import numpy as np
import pandas as pd
import investpy
Crypto_BTC = investpy.get_crypto_historical_data(crypto='bitcoin', from_date='01/01/1990', to_date='01/01/2022')
Crypto_ETH = investpy.get_crypto_historical_data(crypto='Ethereum', from_date='01/01/1990', to_date='01/01/2022')
Crypto_USDT = investpy.get_crypto_historical_data(crypto='Tether', from_date='01/01/1990', to_date='01/01/2022')
stock_dow = investpy.indices.get_index_historical_data(index = "DOW 30", country = "United States", from_date='01/01/1990', to_date='01/01/2022')
stock_sp500 = investpy.indices.get_index_historical_data(index = "S&P 500", country = "United States", from_date='01/01/1990', to_date='01/01/2022')
stock_ftse100 = investpy.indices.get_index_historical_data(index = "FTSE 100", country = "United Kingdom", from_date='01/01/1990', to_date='01/01/2022')
stock_spasx200 = investpy.indices.get_index_historical_data(index = "S&P/ASX 200", country = "Australia", from_date='01/01/1990', to_date='01/01/2022')
stock_sptsx = investpy.indices.get_index_historical_data(index = "S&P/TSX", country = "Canada", from_date='01/01/1990', to_date='01/01/2022')
stock_shai = investpy.indices.get_index_historical_data(index = "Shanghai", country = "China", from_date='01/01/1990', to_date='01/01/2022')
stock_cac40 = investpy.indices.get_index_historical_data(index = "CAC 40", country = "France", from_date='01/01/1990', to_date='01/01/2022')
stock_dax = investpy.indices.get_index_historical_data(index = "DAX", country = "Germany", from_date='01/01/1990', to_date='01/01/2022')
stock_hs = investpy.indices.get_index_historical_data(index = "Hang Seng", country = "Hong Kong", from_date='01/01/1990', to_date='01/01/2022')
stock_nfty = investpy.indices.get_index_historical_data(index = "Nifty 50", country = "India", from_date='01/01/1990', to_date='01/01/2022')
stock_nikkei = investpy.indices.get_index_historical_data(index = "Nikkei 225", country = "Japan", from_date='01/01/1990', to_date='01/01/2022')
stock_kospi = investpy.indices.get_index_historical_data(index = "KOSPI", country = "South Korea", from_date='01/01/1990', to_date='01/01/2022')
stock_smi = investpy.indices.get_index_historical_data(index = "SMI", country = "Switzerland", from_date='01/01/1990', to_date='01/01/2022')
stock_set = investpy.indices.get_index_historical_data(index = "SET", country = "Thailand", from_date='01/01/1990', to_date='01/01/2022')
Alter_GOLD = investpy.get_commodity_historical_data(commodity='gold', from_date='01/01/1990', to_date='01/01/2022')
Alter_SILVER = investpy.get_commodity_historical_data(commodity='silver', from_date='01/01/1990', to_date='01/01/2022')
Alter_OIL = investpy.get_commodity_historical_data(commodity='Crude Oil WTI', from_date='01/01/1990', to_date='01/01/2022')
Alter_US10Y = investpy.bonds.get_bond_historical_data(bond = "U.S. 10Y", from_date='01/01/1990', to_date='01/01/2022')
Alter_US2Y = investpy.bonds.get_bond_historical_data(bond = "U.S. 2Y", from_date='01/01/1990', to_date='01/01/2022')
Alter_US3M = investpy.bonds.get_bond_historical_data(bond = "U.S. 3M", from_date='01/01/1990', to_date='01/01/2022')
Alter_Wheat = investpy.get_commodity_historical_data(commodity='US Wheat', from_date='01/01/1990', to_date='01/01/2022')
Alter_CORN = investpy.get_commodity_historical_data(commodity='US Corn', from_date='01/01/1990', to_date='01/01/2022')
Alter_SOYBEAN = investpy.get_commodity_historical_data(commodity='US Soybeans', from_date='01/01/1990', to_date='01/01/2022')
Alter_DXY = investpy.indices.get_index_historical_data(index = "US Dollar Index", country = "United States", from_date='01/01/1990', to_date='01/01/2022')
# Alter_GOLD
# Alter_OIL
# Alter_US10Y
# Alter_US2Y
# Alter_US3M
# Alter_DXY
# Crypto_BTC
# Crypto_ETH
# Crypto_USDT
# stock_dow
# stock_sp500
# stock_ftse100
# stock_spasx200
# stock_sptsx
# stock_shai
# stock_cac40
# stock_dax
# stock_hs
# stock_kospi
# stock_nfty
# stock_nikkei
# stock_smi
# stock_set
name_list = ["GOLD","OIL","US_10Y","US_2Y","US_3M","DXY",
"BTC" , "ETH" , "USDT" ,
"Dow" , "SP500" , "FTSE100" , "SPASX200",
"SPTSX" ,"SHAI","CAC40","DAX",
"HS" , "KOSPI" ,"SMI",
"NIKKEI" , "NFTY" ,"SET"]
df_list = ["Alter_GOLD","Alter_OIL","Alter_US10Y","Alter_US2Y","Alter_US3M","Alter_DXY",
"Crypto_BTC","Crypto_ETH","Crypto_USDT",
"stock_dow","stock_sp500","stock_ftse100","stock_spasx200",
"stock_sptsx","stock_shai","stock_cac40","stock_dax",
"stock_hs","stock_kospi","stock_smi",
"stock_nikkei","stock_nfty","stock_set"]
P_list = [i + "_P" for i in name_list]
R_list = [i + "_R" for i in name_list]
grp_list = [i + "_grp" for i in df_list]
Alter_GOLD['return'] = np.log(Alter_GOLD['Close']) - np.log(Alter_GOLD['Close']+0.000001).shift(1)
Alter_GOLD['date'] = Alter_GOLD.index
Alter_GOLD['year'] = Alter_GOLD.date.dt.year.astype(int)
Alter_GOLD['par_month'] = (Alter_GOLD.date.dt.year.astype(str) + Alter_GOLD.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_GOLD['par_week'] = (Alter_GOLD.date.dt.year.astype(str) + Alter_GOLD.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Alter_OIL['return'] = np.log(Alter_OIL['Close']) - np.log(Alter_OIL['Close']+0.000001).shift(1)
Alter_OIL['date'] = Alter_OIL.index
Alter_OIL['year'] = Alter_OIL.date.dt.year.astype(int)
Alter_OIL['par_month'] = (Alter_OIL.date.dt.year.astype(str) + Alter_OIL.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_OIL['par_week'] = (Alter_OIL.date.dt.year.astype(str) + Alter_OIL.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Alter_US10Y['return'] = np.log(Alter_US10Y['Close']) - np.log(Alter_US10Y['Close']+0.000001).shift(1)
Alter_US10Y['date'] = Alter_US10Y.index
Alter_US10Y['year'] = Alter_US10Y.date.dt.year.astype(int)
Alter_US10Y['par_month'] = (Alter_US10Y.date.dt.year.astype(str) + Alter_US10Y.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_US10Y['par_week'] = (Alter_US10Y.date.dt.year.astype(str) + Alter_US10Y.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Alter_US2Y['return'] = np.log(Alter_US2Y['Close']) - np.log(Alter_US2Y['Close']+0.000001).shift(1)
Alter_US2Y['date'] = Alter_US2Y.index
Alter_US2Y['year'] = Alter_US2Y.date.dt.year.astype(int)
Alter_US2Y['par_month'] = (Alter_US2Y.date.dt.year.astype(str) + Alter_US2Y.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_US2Y['par_week'] = (Alter_US2Y.date.dt.year.astype(str) + Alter_US2Y.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Alter_US3M['return'] = np.log(Alter_US3M['Close']) - np.log(Alter_US3M['Close']+0.000001).shift(1)
Alter_US3M['date'] = Alter_US3M.index
Alter_US3M['year'] = Alter_US3M.date.dt.year.astype(int)
Alter_US3M['par_month'] = (Alter_US3M.date.dt.year.astype(str) + Alter_US3M.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_US3M['par_week'] = (Alter_US3M.date.dt.year.astype(str) + Alter_US3M.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Alter_DXY['return'] = np.log(Alter_DXY['Close']) - np.log(Alter_DXY['Close']+0.000001).shift(1)
Alter_DXY['date'] = Alter_DXY.index
Alter_DXY['year'] = Alter_DXY.date.dt.year.astype(int)
Alter_DXY['par_month'] = (Alter_DXY.date.dt.year.astype(str) + Alter_DXY.date.dt.month.astype(str).str.zfill(2)).astype(int)
Alter_DXY['par_week'] = (Alter_DXY.date.dt.year.astype(str) + Alter_DXY.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Crypto_BTC['return'] = np.log(Crypto_BTC['Close']) - np.log(Crypto_BTC['Close']+0.000001).shift(1)
Crypto_BTC['date'] = Crypto_BTC.index
Crypto_BTC['year'] = Crypto_BTC.date.dt.year.astype(int)
Crypto_BTC['par_month'] = (Crypto_BTC.date.dt.year.astype(str) + Crypto_BTC.date.dt.month.astype(str).str.zfill(2)).astype(int)
Crypto_BTC['par_week'] = (Crypto_BTC.date.dt.year.astype(str) + Crypto_BTC.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Crypto_ETH['return'] = np.log(Crypto_ETH['Close']) - np.log(Crypto_ETH['Close']+0.000001).shift(1)
Crypto_ETH['date'] = Crypto_ETH.index
Crypto_ETH['year'] = Crypto_ETH.date.dt.year.astype(int)
Crypto_ETH['par_month'] = (Crypto_ETH.date.dt.year.astype(str) + Crypto_ETH.date.dt.month.astype(str).str.zfill(2)).astype(int)
Crypto_ETH['par_week'] = (Crypto_ETH.date.dt.year.astype(str) + Crypto_ETH.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
Crypto_USDT['return'] = np.log(Crypto_USDT['Close']) - np.log(Crypto_USDT['Close']+0.000001).shift(1)
Crypto_USDT['date'] = Crypto_USDT.index
Crypto_USDT['year'] = Crypto_USDT.date.dt.year.astype(int)
Crypto_USDT['par_month'] = (Crypto_USDT.date.dt.year.astype(str) + Crypto_USDT.date.dt.month.astype(str).str.zfill(2)).astype(int)
Crypto_USDT['par_week'] = (Crypto_USDT.date.dt.year.astype(str) + Crypto_USDT.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_dow['return'] = np.log(stock_dow['Close']) - np.log(stock_dow['Close']+0.000001).shift(1)
stock_dow['date'] = stock_dow.index
stock_dow['year'] = stock_dow.date.dt.year.astype(int)
stock_dow['par_month'] = (stock_dow.date.dt.year.astype(str) + stock_dow.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_dow['par_week'] = (stock_dow.date.dt.year.astype(str) + stock_dow.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_sp500['return'] = np.log(stock_sp500['Close']) - np.log(stock_sp500['Close']+0.000001).shift(1)
stock_sp500['date'] = stock_sp500.index
stock_sp500['year'] = stock_sp500.date.dt.year.astype(int)
stock_sp500['par_month'] = (stock_sp500.date.dt.year.astype(str) + stock_sp500.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_sp500['par_week'] = (stock_sp500.date.dt.year.astype(str) + stock_sp500.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_ftse100['return'] = np.log(stock_ftse100['Close']) - np.log(stock_ftse100['Close']+0.000001).shift(1)
stock_ftse100['date'] = stock_ftse100.index
stock_ftse100['year'] = stock_ftse100.date.dt.year.astype(int)
stock_ftse100['par_month'] = (stock_ftse100.date.dt.year.astype(str) + stock_ftse100.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_ftse100['par_week'] = (stock_ftse100.date.dt.year.astype(str) + stock_ftse100.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_spasx200['return'] = np.log(stock_spasx200['Close']) - np.log(stock_spasx200['Close']+0.000001).shift(1)
stock_spasx200['date'] = stock_spasx200.index
stock_spasx200['year'] = stock_spasx200.date.dt.year.astype(int)
stock_spasx200['par_month'] = (stock_spasx200.date.dt.year.astype(str) + stock_spasx200.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_spasx200['par_week'] = (stock_spasx200.date.dt.year.astype(str) + stock_spasx200.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_sptsx['return'] = np.log(stock_sptsx['Close']) - np.log(stock_sptsx['Close']+0.000001).shift(1)
stock_sptsx['date'] = stock_sptsx.index
stock_sptsx['year'] = stock_sptsx.date.dt.year.astype(int)
stock_sptsx['par_month'] = (stock_sptsx.date.dt.year.astype(str) + stock_sptsx.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_sptsx['par_week'] = (stock_sptsx.date.dt.year.astype(str) + stock_sptsx.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_shai['return'] = np.log(stock_shai['Close']) - np.log(stock_shai['Close']+0.000001).shift(1)
stock_shai['date'] = stock_shai.index
stock_shai['year'] = stock_shai.date.dt.year.astype(int)
stock_shai['par_month'] = (stock_shai.date.dt.year.astype(str) + stock_shai.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_shai['par_week'] = (stock_shai.date.dt.year.astype(str) + stock_shai.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_cac40['return'] = np.log(stock_cac40['Close']) - np.log(stock_cac40['Close']+0.000001).shift(1)
stock_cac40['date'] = stock_cac40.index
stock_cac40['year'] = stock_cac40.date.dt.year.astype(int)
stock_cac40['par_month'] = (stock_cac40.date.dt.year.astype(str) + stock_cac40.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_cac40['par_week'] = (stock_cac40.date.dt.year.astype(str) + stock_cac40.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_dax['return'] = np.log(stock_dax['Close']) - np.log(stock_dax['Close']+0.000001).shift(1)
stock_dax['date'] = stock_dax.index
stock_dax['year'] = stock_dax.date.dt.year.astype(int)
stock_dax['par_month'] = (stock_dax.date.dt.year.astype(str) + stock_dax.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_dax['par_week'] = (stock_dax.date.dt.year.astype(str) + stock_dax.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_hs['return'] = np.log(stock_hs['Close']) - np.log(stock_hs['Close']+0.000001).shift(1)
stock_hs['date'] = stock_hs.index
stock_hs['year'] = stock_hs.date.dt.year.astype(int)
stock_hs['par_month'] = (stock_hs.date.dt.year.astype(str) + stock_hs.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_hs['par_week'] = (stock_hs.date.dt.year.astype(str) + stock_hs.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_kospi['return'] = np.log(stock_kospi['Close']) - np.log(stock_kospi['Close']+0.000001).shift(1)
stock_kospi['date'] = stock_kospi.index
stock_kospi['year'] = stock_kospi.date.dt.year.astype(int)
stock_kospi['par_month'] = (stock_kospi.date.dt.year.astype(str) + stock_kospi.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_kospi['par_week'] = (stock_kospi.date.dt.year.astype(str) + stock_kospi.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_smi['return'] = np.log(stock_smi['Close']) - np.log(stock_smi['Close']+0.000001).shift(1)
stock_smi['date'] = stock_smi.index
stock_smi['year'] = stock_smi.date.dt.year.astype(int)
stock_smi['par_month'] = (stock_smi.date.dt.year.astype(str) + stock_smi.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_smi['par_week'] = (stock_smi.date.dt.year.astype(str) + stock_smi.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_nikkei['return'] = np.log(stock_nikkei['Close']) - np.log(stock_nikkei['Close']+0.000001).shift(1)
stock_nikkei['date'] = stock_nikkei.index
stock_nikkei['year'] = stock_nikkei.date.dt.year.astype(int)
stock_nikkei['par_month'] = (stock_nikkei.date.dt.year.astype(str) + stock_nikkei.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_nikkei['par_week'] = (stock_nikkei.date.dt.year.astype(str) + stock_nikkei.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_nfty['return'] = np.log(stock_nfty['Close']) - np.log(stock_nfty['Close']+0.000001).shift(1)
stock_nfty['date'] = stock_nfty.index
stock_nfty['year'] = stock_nfty.date.dt.year.astype(int)
stock_nfty['par_month'] = (stock_nfty.date.dt.year.astype(str) + stock_nfty.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_nfty['par_week'] = (stock_nfty.date.dt.year.astype(str) + stock_nfty.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
stock_set['return'] = np.log(stock_set['Close']) - np.log(stock_set['Close']+0.000001).shift(1)
stock_set['date'] = stock_set.index
stock_set['year'] = stock_set.date.dt.year.astype(int)
stock_set['par_month'] = (stock_set.date.dt.year.astype(str) + stock_set.date.dt.month.astype(str).str.zfill(2)).astype(int)
stock_set['par_week'] = (stock_set.date.dt.year.astype(str) + stock_set.date.dt.isocalendar().week.astype(str).str.zfill(2)).astype(int)
C:\Users\wann\anaconda3\lib\site-packages\pandas\core\series.py:726: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs) C:\Users\wann\anaconda3\lib\site-packages\pandas\core\series.py:726: RuntimeWarning: divide by zero encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
for index, value in enumerate(df_list):
agg_list_p = ["last"]
agg_list_r = ["std","mean"]
close_col = "Close"
ret_col = "return"
# exec("{x} = {y}.groupby(['par_month']).agg({close_col:agg_list_p , ret_col:agg_list_r}).reset_index() ".format(y = df_list[index], x = grp_list[index]))
exec("{x} = {y}.groupby(['par_week'])[['Close']].last().reset_index()".format(y = df_list[index], x = grp_list[index]))
name_p = P_list[index]
name_agg = ["par_week"]
for i in agg_list_p:
name_agg.append(name_p + "_" + i)
# name_r = R_list[index]
# for i in agg_list_r:
# name_agg.append(name_r + "_" + i)
exec("{x}.columns = name_agg".format(x =grp_list[index] ))
# joined
# prep_merge = pd.merge( Crypto_BTC_grp ,
# pd.merge(Stock_DJ_grp ,
# Alter_GOLD_grp ,
# how = "left",
# on = ["par_month"]),
# how = "left" , on = ["par_month"])
# prep_merge_r = np.log(prep_merge.filter(regex='last')) - np.log(prep_merge.filter(regex='last').shift(1))
# concatenated_prep = pd.concat([prep_merge[[i for i in prep_merge.columns if i not in prep_merge_r.columns]], prep_merge_r], axis=1)
prep_merge_data = Alter_GOLD_grp
for index, value in enumerate(grp_list[1:]):
exec('prep_merge_data= pd.merge(prep_merge_data , {x} , how = "left", on = ["par_week"]) '.format(x= grp_list[1:][index]))
prep_merge_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1635 entries, 0 to 1634 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 par_week 1635 non-null int64 1 GOLD_P_last 1635 non-null float64 2 OIL_P_last 1633 non-null float64 3 US_10Y_P_last 1634 non-null float64 4 US_2Y_P_last 1628 non-null float64 5 US_3M_P_last 1630 non-null float64 6 DXY_P_last 1635 non-null float64 7 BTC_P_last 564 non-null float64 8 ETH_P_last 269 non-null float64 9 USDT_P_last 212 non-null float64 10 Dow_P_last 747 non-null float64 11 SP500_P_last 799 non-null float64 12 FTSE100_P_last 1059 non-null float64 13 SPASX200_P_last 1507 non-null float64 14 SPTSX_P_last 1633 non-null float64 15 SHAI_P_last 1538 non-null float64 16 CAC40_P_last 1633 non-null float64 17 DAX_P_last 1059 non-null float64 18 HS_P_last 1063 non-null float64 19 KOSPI_P_last 1611 non-null float64 20 SMI_P_last 1631 non-null float64 21 NIKKEI_P_last 1055 non-null float64 22 NFTY_P_last 1329 non-null float64 23 SET_P_last 1630 non-null float64 dtypes: float64(23), int64(1) memory usage: 319.3 KB
prep_merge_data_201715 = prep_merge_data[prep_merge_data['par_week'] > 201715][prep_merge_data['par_week'] < 202150]
<ipython-input-14-4629dc12f7f9>:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index. prep_merge_data_201715 = prep_merge_data[prep_merge_data['par_week'] > 201715][prep_merge_data['par_week'] < 202150]
prep_merge_data_201715.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 210 entries, 1424 to 1633 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 par_week 210 non-null int64 1 GOLD_P_last 210 non-null float64 2 OIL_P_last 210 non-null float64 3 US_10Y_P_last 210 non-null float64 4 US_2Y_P_last 210 non-null float64 5 US_3M_P_last 210 non-null float64 6 DXY_P_last 210 non-null float64 7 BTC_P_last 210 non-null float64 8 ETH_P_last 210 non-null float64 9 USDT_P_last 210 non-null float64 10 Dow_P_last 210 non-null float64 11 SP500_P_last 210 non-null float64 12 FTSE100_P_last 210 non-null float64 13 SPASX200_P_last 210 non-null float64 14 SPTSX_P_last 210 non-null float64 15 SHAI_P_last 206 non-null float64 16 CAC40_P_last 210 non-null float64 17 DAX_P_last 210 non-null float64 18 HS_P_last 210 non-null float64 19 KOSPI_P_last 209 non-null float64 20 SMI_P_last 210 non-null float64 21 NIKKEI_P_last 208 non-null float64 22 NFTY_P_last 210 non-null float64 23 SET_P_last 210 non-null float64 dtypes: float64(23), int64(1) memory usage: 41.0 KB
new_strings = []
for string in prep_merge_data_201715.columns:
new_string = string.replace("_P_last", "")
new_strings.append(new_string)
new_strings
['par_week', 'GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC', 'ETH', 'USDT', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'SHAI', 'CAC40', 'DAX', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET']
prep_merge_data_201715.columns = new_strings
data_for_analysis =prep_merge_data_201715[[i for i in prep_merge_data_201715 if i not in ['USDT']]]
non_par_week_col = [
'BTC',
'ETH',
# 'USDT',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M',
'DXY',
'Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'SHAI',
'KOSPI',
'SMI',
'NIKKEI',
'NFTY',
'SET']
data_for_analysis_ret = data_for_analysis.copy()
for i in non_par_week_col:
exec('data_for_analysis_ret["{x}"] = np.log(data_for_analysis_ret["{x}"]) - np.log(data_for_analysis_ret["{x}"]).shift(1)'.format(x = i))
data_for_analysis_ret.dropna()
| par_week | GOLD | OIL | US_10Y | US_2Y | US_3M | DXY | BTC | ETH | Dow | ... | SPTSX | SHAI | CAC40 | DAX | HS | KOSPI | SMI | NIKKEI | NFTY | SET | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1425 | 201717 | -0.014664 | -0.005862 | 0.014979 | 0.063175 | 0.022757 | -0.009345 | 0.079088 | 0.448467 | 0.018934 | ... | -0.001817 | -0.005844 | 0.040315 | 0.031811 | 0.023558 | 0.018488 | 0.029793 | 0.015402 | 0.020046 | -0.002359 |
| 1426 | 201718 | -0.031138 | -0.065120 | 0.034229 | 0.046534 | 0.105486 | -0.004047 | 0.139579 | 0.120560 | 0.003167 | ... | -0.000262 | -0.016498 | 0.030857 | 0.022174 | -0.005654 | 0.016102 | 0.022884 | 0.027049 | -0.002017 | 0.001722 |
| 1427 | 201719 | -0.000077 | 0.034450 | -0.015202 | -0.024193 | -0.011312 | 0.006064 | 0.131358 | -0.041614 | -0.005266 | ... | -0.002838 | -0.006314 | -0.004979 | 0.004200 | 0.027403 | 0.019783 | 0.011770 | 0.007821 | 0.012373 | -0.016114 |
| 1428 | 201720 | 0.018364 | 0.050739 | -0.033783 | -0.006044 | 0.031358 | -0.021489 | 0.142559 | 0.480653 | -0.004401 | ... | -0.005124 | 0.002306 | -0.015102 | -0.010368 | 0.000736 | 0.001076 | -0.011121 | -0.010587 | 0.002868 | 0.003685 |
| 1429 | 201721 | -0.047489 | -0.007779 | 0.001423 | 0.008744 | 0.030404 | 0.003084 | 0.068438 | 0.102476 | 0.013152 | ... | -0.002690 | 0.006267 | 0.002296 | -0.002893 | 0.018279 | 0.028780 | 0.002161 | 0.000584 | 0.017579 | 0.012588 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1629 | 202112 | -0.005412 | -0.010968 | -0.020361 | -0.072684 | 0.287682 | 0.009205 | -0.028612 | -0.055868 | 0.013544 | ... | -0.005394 | 0.004007 | -0.001527 | 0.008712 | -0.022835 | 0.000487 | 0.013534 | -0.020871 | -0.016184 | 0.006945 |
| 1630 | 202113 | -0.003354 | 0.011457 | 0.019540 | 0.269560 | 0.000000 | 0.002691 | 0.042734 | 0.207581 | 0.002426 | ... | 0.012598 | 0.019141 | 0.018881 | 0.023998 | 0.021033 | 0.023333 | 0.000110 | 0.022948 | 0.024516 | 0.013503 |
| 1631 | 202114 | 0.009684 | -0.035277 | -0.018285 | -0.122957 | -0.430783 | -0.009288 | 0.030106 | 0.035714 | 0.019339 | ... | 0.012440 | -0.009722 | 0.010829 | 0.008371 | -0.008326 | 0.006111 | 0.010779 | -0.002883 | -0.002188 | -0.018928 |
| 1632 | 202115 | 0.020272 | 0.062250 | -0.059293 | -0.009877 | 0.143101 | -0.006532 | -0.064946 | 0.039084 | 0.011767 | ... | 0.006392 | -0.006997 | 0.018892 | 0.014700 | 0.009395 | 0.021086 | 0.002173 | -0.002849 | -0.014736 | -0.011158 |
| 1633 | 202116 | -0.001125 | -0.015806 | -0.011856 | -0.010602 | 0.287682 | -0.007675 | -0.124230 | -0.022242 | -0.004606 | ... | -0.012950 | 0.013781 | -0.004644 | -0.011720 | 0.003757 | -0.003922 | -0.005558 | -0.022580 | -0.019096 | 0.002985 |
197 rows × 23 columns
import seaborn as sns
import matplotlib.pyplot as plt
df_for_cor = data_for_analysis_ret.dropna()[non_par_week_col]
correlation_mat = df_for_cor.corr()
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(correlation_mat, annot = True)
plt.show()
# import matplotlib.pyplot as plt
# # Sample figsize in inches
# sns.heatmap(df1.iloc[:, 1:6:], annot=True, linewidths=.5, ax=ax)
sns.pairplot(df_for_cor)
plt.show()
# plan for tomorrow
# analysis dimension
#1 Cryto correlation
#2 Cryto with USD currency
#3 Cryto with alternative
#4 Cryto with stock
# the important is to explain different characteristic between 3 cryto value
# create regression
# pca
# done
data_for_analysis
| par_week | GOLD | OIL | US_10Y | US_2Y | US_3M | DXY | BTC | ETH | Dow | ... | SPTSX | SHAI | CAC40 | DAX | HS | KOSPI | SMI | NIKKEI | NFTY | SET | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1424 | 201716 | 1367.00 | 49.62 | 2.2463 | 1.1884 | 0.782 | 99.98 | 1249.1 | 53.33 | 20547.76 | ... | 15614.48 | 3173.15 | 5059.20 | 12048.57 | 24042.02 | 2165.04 | 8553.99 | 18909.00 | 9119.40 | 1570.02 |
| 1425 | 201717 | 1347.10 | 49.33 | 2.2802 | 1.2659 | 0.800 | 99.05 | 1351.9 | 83.51 | 20940.51 | ... | 15586.13 | 3154.66 | 5267.33 | 12438.01 | 24615.13 | 2205.44 | 8812.67 | 19202.50 | 9304.05 | 1566.32 |
| 1426 | 201718 | 1305.80 | 46.22 | 2.3596 | 1.3262 | 0.889 | 98.65 | 1554.4 | 94.21 | 21006.94 | ... | 15582.04 | 3103.04 | 5432.40 | 12716.89 | 24476.35 | 2241.24 | 9016.66 | 19729.00 | 9285.30 | 1569.02 |
| 1427 | 201719 | 1305.70 | 47.84 | 2.3240 | 1.2945 | 0.879 | 99.25 | 1772.6 | 90.37 | 20896.61 | ... | 15537.88 | 3083.51 | 5405.42 | 12770.41 | 25156.34 | 2286.02 | 9123.41 | 19883.90 | 9400.90 | 1543.94 |
| 1428 | 201720 | 1329.90 | 50.33 | 2.2468 | 1.2867 | 0.907 | 97.14 | 2044.2 | 146.14 | 20804.84 | ... | 15458.46 | 3090.63 | 5324.40 | 12638.69 | 25174.87 | 2288.48 | 9022.51 | 19674.50 | 9427.90 | 1549.64 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1629 | 202112 | 1732.30 | 60.75 | 1.6724 | 0.1406 | 0.020 | 92.77 | 55765.2 | 1686.80 | 33072.88 | ... | 18752.58 | 3418.33 | 5988.81 | 14748.94 | 28336.43 | 3041.01 | 11116.81 | 29176.70 | 14507.30 | 1574.86 |
| 1630 | 202113 | 1726.50 | 61.45 | 1.7054 | 0.1841 | 0.020 | 93.02 | 58199.9 | 2075.94 | 33153.21 | ... | 18990.32 | 3484.39 | 6102.96 | 15107.17 | 28938.74 | 3112.80 | 11118.03 | 29854.00 | 14867.35 | 1596.27 |
| 1631 | 202114 | 1743.30 | 59.32 | 1.6745 | 0.1628 | 0.013 | 92.16 | 59978.7 | 2151.42 | 33800.60 | ... | 19228.03 | 3450.68 | 6169.41 | 15234.16 | 28698.80 | 3131.88 | 11238.52 | 29768.06 | 14834.85 | 1566.34 |
| 1632 | 202115 | 1779.00 | 63.13 | 1.5781 | 0.1612 | 0.015 | 91.56 | 56207.1 | 2237.17 | 34200.67 | ... | 19351.32 | 3426.62 | 6287.07 | 15459.75 | 28969.71 | 3198.62 | 11262.97 | 29683.37 | 14617.85 | 1548.96 |
| 1633 | 202116 | 1776.75 | 62.05 | 1.5595 | 0.1595 | 0.020 | 90.86 | 49946.5 | 2279.22 | 34043.49 | ... | 19102.33 | 3474.17 | 6257.94 | 15279.62 | 29078.75 | 3186.10 | 11200.54 | 29020.63 | 14341.35 | 1553.59 |
210 rows × 23 columns
from statsmodels.regression.linear_model import OLS
model_df= data_for_analysis_ret.dropna()
data_for_analysis.columns
Index(['par_week', 'GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC',
'ETH', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'SHAI', 'CAC40',
'DAX', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET'],
dtype='object')
for i in non_par_week_col:
exec('model_df["{y}_1"] = model_df["{y}"].shift(1)'.format(y = i))
exec('model_df["{y}_2"] = model_df["{y}"].shift(2)'.format(y = i))
exec('model_df["{y}_3"] = model_df["{y}"].shift(3)'.format(y = i))
exec('model_df["{y}_4"] = model_df["{y}"].shift(4)'.format(y = i))
exec('model_df["{y}_5"] = model_df["{y}"].shift(5)'.format(y = i))
<string>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
model_df.columns[:100]
Index(['par_week', 'GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC',
'ETH', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'SHAI', 'CAC40',
'DAX', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET', 'BTC_1', 'BTC_2',
'BTC_3', 'BTC_4', 'BTC_5', 'ETH_1', 'ETH_2', 'ETH_3', 'ETH_4', 'ETH_5',
'GOLD_1', 'GOLD_2', 'GOLD_3', 'GOLD_4', 'GOLD_5', 'OIL_1', 'OIL_2',
'OIL_3', 'OIL_4', 'OIL_5', 'US_10Y_1', 'US_10Y_2', 'US_10Y_3',
'US_10Y_4', 'US_10Y_5', 'US_2Y_1', 'US_2Y_2', 'US_2Y_3', 'US_2Y_4',
'US_2Y_5', 'US_3M_1', 'US_3M_2', 'US_3M_3', 'US_3M_4', 'US_3M_5',
'DXY_1', 'DXY_2', 'DXY_3', 'DXY_4', 'DXY_5', 'Dow_1', 'Dow_2', 'Dow_3',
'Dow_4', 'Dow_5', 'SP500_1', 'SP500_2', 'SP500_3', 'SP500_4', 'SP500_5',
'FTSE100_1', 'FTSE100_2', 'FTSE100_3', 'FTSE100_4', 'FTSE100_5',
'SPASX200_1', 'SPASX200_2', 'SPASX200_3', 'SPASX200_4', 'SPASX200_5',
'SPTSX_1', 'SPTSX_2', 'SPTSX_3', 'SPTSX_4', 'SPTSX_5', 'CAC40_1',
'CAC40_2', 'CAC40_3', 'CAC40_4', 'CAC40_5', 'DAX_1', 'DAX_2', 'DAX_3',
'DAX_4', 'DAX_5', 'HS_1', 'HS_2'],
dtype='object')
model_df_lag = model_df.dropna().iloc[:-10,:]
# no lag
ols_1 = OLS(model_df['BTC'] , model_df[['GOLD','Dow','DXY']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.229
Model: OLS Adj. R-squared (uncentered): 0.217
Method: Least Squares F-statistic: 19.17
Date: Fri, 23 Apr 2021 Prob (F-statistic): 6.26e-11
Time: 22:41:57 Log-Likelihood: 71.407
No. Observations: 197 AIC: -136.8
Df Residuals: 194 BIC: -127.0
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.1588 0.454 0.350 0.727 -0.737 1.055
Dow 1.6239 0.373 4.348 0.000 0.887 2.360
DXY -4.4480 1.312 -3.389 0.001 -7.036 -1.860
==============================================================================
Omnibus: 55.022 Durbin-Watson: 2.645
Prob(Omnibus): 0.000 Jarque-Bera (JB): 912.115
Skew: -0.459 Prob(JB): 8.64e-199
Kurtosis: 13.501 Cond. No. 4.42
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# lag1 2 3 4 5
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['GOLD', 'GOLD_1','GOLD_4' ,
# 'SP500','SP500_1','SP500_4',
'Dow', 'Dow_1' , 'Dow_4' ,
'DXY', 'DXY_1' , 'DXY_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.286
Model: OLS Adj. R-squared (uncentered): 0.249
Method: Least Squares F-statistic: 7.717
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.65e-09
Time: 23:20:14 Log-Likelihood: 68.559
No. Observations: 182 AIC: -119.1
Df Residuals: 173 BIC: -90.28
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -0.0841 0.500 -0.168 0.867 -1.070 0.902
GOLD_1 -0.7195 0.496 -1.451 0.149 -1.698 0.259
GOLD_4 0.4551 0.466 0.976 0.330 -0.465 1.375
Dow 1.7294 0.391 4.420 0.000 0.957 2.502
Dow_1 0.4741 0.389 1.219 0.225 -0.294 1.242
Dow_4 0.0906 0.389 0.233 0.816 -0.678 0.859
DXY -4.0414 1.447 -2.794 0.006 -6.897 -1.186
DXY_1 2.9841 1.450 2.058 0.041 0.123 5.845
DXY_4 -1.7878 1.372 -1.303 0.194 -4.497 0.921
==============================================================================
Omnibus: 47.743 Durbin-Watson: 2.666
Prob(Omnibus): 0.000 Jarque-Bera (JB): 514.583
Skew: -0.541 Prob(JB): 1.82e-112
Kurtosis: 11.166 Cond. No. 6.37
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 btc w stock
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['Dow','Dow_1', 'Dow_4',
'SP500','SP500_1', 'SP500_4',
'FTSE100','FTSE100_1' ,'FTSE100_4',
'SPASX200','SPASX200_1', 'SPASX200_4',
'SPTSX','SPTSX_1','SPTSX_4',
'CAC40','CAC40_1', 'CAC40_4',
'DAX','DAX_1', 'DAX_4',
'HS','HS_1', 'HS_4',
'KOSPI','KOSPI_1', 'KOSPI_4',
'SMI','SMI_1', 'SMI_4',
'SHAI','SHAI_1','SHAI_4',
'NIKKEI','NIKKEI_1','NIKKEI_4',
'NFTY_1','NFTY_1', 'NFTY_4',
'SET','SET_1', 'SET_4']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.697
Model: OLS Adj. R-squared (uncentered): 0.609
Method: Least Squares F-statistic: 7.924
Date: Fri, 23 Apr 2021 Prob (F-statistic): 6.97e-21
Time: 23:20:14 Log-Likelihood: 146.61
No. Observations: 182 AIC: -211.2
Df Residuals: 141 BIC: -79.85
Df Model: 41
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow -2.5560 1.584 -1.613 0.109 -5.688 0.576
Dow_1 0.0927 1.611 0.058 0.954 -3.091 3.277
Dow_4 -2.4343 1.579 -1.542 0.125 -5.556 0.687
SP500 2.0204 1.748 1.156 0.250 -1.436 5.477
SP500_1 1.5262 1.715 0.890 0.375 -1.865 4.917
SP500_4 -0.4386 1.731 -0.253 0.800 -3.860 2.983
FTSE100 1.6555 0.926 1.788 0.076 -0.175 3.486
FTSE100_1 0.3548 0.945 0.375 0.708 -1.514 2.223
FTSE100_4 -1.1447 0.921 -1.243 0.216 -2.965 0.675
SPASX200 0.9508 0.845 1.126 0.262 -0.719 2.620
SPASX200_1 -0.8927 0.809 -1.103 0.272 -2.492 0.707
SPASX200_4 0.5511 0.717 0.768 0.444 -0.867 1.970
SPTSX 1.9066 1.109 1.718 0.088 -0.287 4.100
SPTSX_1 -0.5316 1.124 -0.473 0.637 -2.754 1.690
SPTSX_4 2.9987 1.095 2.740 0.007 0.835 5.163
CAC40 2.4046 1.033 2.328 0.021 0.362 4.447
CAC40_1 -2.0465 1.015 -2.017 0.046 -4.052 -0.041
CAC40_4 0.2589 0.971 0.267 0.790 -1.661 2.179
DAX -2.5076 0.993 -2.526 0.013 -4.470 -0.545
DAX_1 1.1662 0.971 1.201 0.232 -0.753 3.085
DAX_4 -1.7648 0.971 -1.817 0.071 -3.685 0.155
HS 1.1516 0.594 1.938 0.055 -0.023 2.326
HS_1 0.2352 0.567 0.415 0.679 -0.885 1.355
HS_4 0.9526 0.601 1.586 0.115 -0.235 2.140
KOSPI -0.6435 0.739 -0.870 0.386 -2.105 0.818
KOSPI_1 -0.4557 0.752 -0.606 0.545 -1.942 1.031
KOSPI_4 0.0067 0.787 0.009 0.993 -1.550 1.563
SMI 0.1649 0.879 0.188 0.852 -1.573 1.903
SMI_1 -0.2576 0.891 -0.289 0.773 -2.019 1.504
SMI_4 -0.0268 0.837 -0.032 0.974 -1.681 1.627
SHAI -0.0314 0.523 -0.060 0.952 -1.065 1.002
SHAI_1 -0.6643 0.533 -1.246 0.215 -1.719 0.390
SHAI_4 -0.5286 0.547 -0.966 0.336 -1.610 0.553
NIKKEI -1.2939 0.701 -1.846 0.067 -2.680 0.092
NIKKEI_1 0.7500 0.681 1.101 0.273 -0.597 2.097
NIKKEI_4 1.9740 0.668 2.955 0.004 0.653 3.295
NFTY_1 0.0011 0.301 0.004 0.997 -0.594 0.596
NFTY_1 0.0011 0.301 0.004 0.997 -0.594 0.596
NFTY_4 0.6776 0.560 1.211 0.228 -0.429 1.784
SET -0.1943 0.618 -0.314 0.754 -1.416 1.028
SET_1 -0.1070 0.658 -0.163 0.871 -1.408 1.194
SET_4 -0.2588 0.641 -0.404 0.687 -1.526 1.008
==============================================================================
Omnibus: 10.096 Durbin-Watson: 2.097
Prob(Omnibus): 0.006 Jarque-Bera (JB): 17.487
Skew: -0.246 Prob(JB): 0.000159
Kurtosis: 4.436 Cond. No. 1.28e+16
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The smallest eigenvalue is 1.66e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
import sklearn
from sklearn.preprocessing import StandardScaler
x = model_df_lag[['Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'KOSPI',
'SMI',
'SHAI',
'NIKKEI',
'NFTY',
'SET']]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*5, coeff[i,1]*5, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 10, coeff[i,1] * 10, str(['Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'KOSPI',
'SMI',
'SHAI',
'NIKKEI',
'NFTY',
'SET'][i]), color = 'BLACK', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
#2 btc w stock
ols_1 = OLS(model_df_lag['BTC'] , x_new[:,0:3] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.499
Model: OLS Adj. R-squared (uncentered): 0.491
Method: Least Squares F-statistic: 59.52
Date: Fri, 23 Apr 2021 Prob (F-statistic): 9.69e-27
Time: 23:26:06 Log-Likelihood: 100.81
No. Observations: 182 AIC: -195.6
Df Residuals: 179 BIC: -186.0
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 0.0287 0.003 8.660 0.000 0.022 0.035
x2 0.0519 0.011 4.775 0.000 0.030 0.073
x3 0.1065 0.012 8.987 0.000 0.083 0.130
==============================================================================
Omnibus: 5.360 Durbin-Watson: 2.361
Prob(Omnibus): 0.069 Jarque-Bera (JB): 6.695
Skew: -0.175 Prob(JB): 0.0352
Kurtosis: 3.872 Cond. No. 3.57
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(pca.explained_variance_ratio_)
[0.70114267 0.06525144 0.0549119 ]
#2 btc wi dxy
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['DXY' ,'DXY_1' ,'DXY_2','DXY_3', 'DXY_4' ,'DXY_5' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.191
Model: OLS Adj. R-squared (uncentered): 0.164
Method: Least Squares F-statistic: 6.944
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.22e-06
Time: 23:27:07 Log-Likelihood: 57.181
No. Observations: 182 AIC: -102.4
Df Residuals: 176 BIC: -83.14
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -6.3593 1.338 -4.753 0.000 -9.000 -3.719
DXY_1 2.6241 1.399 1.876 0.062 -0.137 5.385
DXY_2 -0.6204 1.386 -0.448 0.655 -3.355 2.114
DXY_3 -1.1768 1.386 -0.849 0.397 -3.912 1.558
DXY_4 -2.9767 1.384 -2.150 0.033 -5.709 -0.245
DXY_5 -0.1493 1.323 -0.113 0.910 -2.760 2.461
==============================================================================
Omnibus: 55.175 Durbin-Watson: 2.623
Prob(Omnibus): 0.000 Jarque-Bera (JB): 657.440
Skew: -0.687 Prob(JB): 1.73e-143
Kurtosis: 12.209 Cond. No. 1.74
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[[
# 'USDT','USDT_1', 'USDT_2', 'USDT_3', 'USDT_4', 'USDT_5',
'GOLD','GOLD_1', 'GOLD_4',
'OIL','OIL_1', 'OIL_4',
'US_10Y','US_10Y_1', 'US_10Y_4',
'US_2Y','US_2Y_1', 'US_2Y_4',
'US_3M', 'US_3M_1', 'US_3M_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.466
Model: OLS Adj. R-squared (uncentered): 0.418
Method: Least Squares F-statistic: 9.726
Date: Fri, 23 Apr 2021 Prob (F-statistic): 2.88e-16
Time: 23:27:13 Log-Likelihood: 94.980
No. Observations: 182 AIC: -160.0
Df Residuals: 167 BIC: -111.9
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.5091 0.449 1.135 0.258 -0.377 1.395
GOLD_1 -0.3351 0.468 -0.716 0.475 -1.260 0.590
GOLD_4 -0.1417 0.435 -0.326 0.745 -1.000 0.716
OIL 1.3767 0.163 8.467 0.000 1.056 1.698
OIL_1 -0.2991 0.177 -1.688 0.093 -0.649 0.051
OIL_4 0.2136 0.156 1.373 0.172 -0.094 0.521
US_10Y 0.3242 0.235 1.378 0.170 -0.140 0.789
US_10Y_1 0.0824 0.229 0.360 0.719 -0.370 0.535
US_10Y_4 0.1043 0.224 0.466 0.642 -0.338 0.546
US_2Y -0.3533 0.207 -1.706 0.090 -0.762 0.056
US_2Y_1 0.1651 0.208 0.794 0.429 -0.246 0.576
US_2Y_4 -0.0359 0.208 -0.173 0.863 -0.446 0.374
US_3M -0.1877 0.075 -2.507 0.013 -0.336 -0.040
US_3M_1 0.0724 0.070 1.038 0.301 -0.065 0.210
US_3M_4 -0.1717 0.067 -2.564 0.011 -0.304 -0.039
==============================================================================
Omnibus: 24.220 Durbin-Watson: 2.387
Prob(Omnibus): 0.000 Jarque-Bera (JB): 103.982
Skew: -0.295 Prob(JB): 2.63e-23
Kurtosis: 6.656 Cond. No. 14.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['DXY',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M',
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.385
Model: OLS Adj. R-squared (uncentered): 0.364
Method: Least Squares F-statistic: 18.33
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.73e-16
Time: 23:28:28 Log-Likelihood: 82.016
No. Observations: 182 AIC: -152.0
Df Residuals: 176 BIC: -132.8
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -3.2859 1.251 -2.626 0.009 -5.755 -0.817
GOLD 0.3528 0.455 0.776 0.439 -0.545 1.250
OIL 1.1612 0.162 7.176 0.000 0.842 1.481
US_10Y 0.1331 0.208 0.639 0.523 -0.278 0.544
US_2Y -0.2352 0.181 -1.302 0.195 -0.592 0.121
US_3M -0.1816 0.054 -3.341 0.001 -0.289 -0.074
==============================================================================
Omnibus: 17.732 Durbin-Watson: 2.249
Prob(Omnibus): 0.000 Jarque-Bera (JB): 55.053
Skew: -0.238 Prob(JB): 1.11e-12
Kurtosis: 5.652 Cond. No. 27.4
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#4 with other cryto
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[[
'BTC_1','BTC_4',
'ETH','ETH_1','ETH_4'
# ,
# 'USDT', 'USDT_1', 'USDT_4'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.728
Model: OLS Adj. R-squared (uncentered): 0.720
Method: Least Squares F-statistic: 94.77
Date: Fri, 23 Apr 2021 Prob (F-statistic): 3.58e-48
Time: 23:27:24 Log-Likelihood: 156.34
No. Observations: 182 AIC: -302.7
Df Residuals: 177 BIC: -286.7
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
BTC_1 -0.2114 0.074 -2.853 0.005 -0.358 -0.065
BTC_4 -0.0537 0.074 -0.728 0.468 -0.199 0.092
ETH 0.6257 0.032 19.820 0.000 0.563 0.688
ETH_1 0.1353 0.055 2.446 0.015 0.026 0.244
ETH_4 0.0024 0.054 0.044 0.965 -0.105 0.110
==============================================================================
Omnibus: 61.015 Durbin-Watson: 1.953
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1058.486
Skew: 0.676 Prob(JB): 1.42e-230
Kurtosis: 14.737 Cond. No. 4.30
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# no lag
ols_1 = OLS(model_df['ETH'] , model_df[['GOLD','Dow','DXY']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.260
Model: OLS Adj. R-squared (uncentered): 0.249
Method: Least Squares F-statistic: 22.76
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.14e-12
Time: 23:40:43 Log-Likelihood: 15.095
No. Observations: 197 AIC: -24.19
Df Residuals: 194 BIC: -14.34
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -1.2591 0.605 -2.083 0.039 -2.451 -0.067
Dow 0.9763 0.497 1.964 0.051 -0.004 1.957
DXY -12.0352 1.747 -6.890 0.000 -15.480 -8.590
==============================================================================
Omnibus: 80.948 Durbin-Watson: 2.597
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1718.876
Skew: 0.963 Prob(JB): 0.00
Kurtosis: 17.342 Cond. No. 4.42
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# lag1 2 3 4 5
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['GOLD', 'GOLD_1','GOLD_4' ,
# 'SP500','SP500_1','SP500_4',
'Dow', 'Dow_1' , 'Dow_4' ,
'DXY', 'DXY_1' , 'DXY_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.313
Model: OLS Adj. R-squared (uncentered): 0.278
Method: Least Squares F-statistic: 8.767
Date: Fri, 23 Apr 2021 Prob (F-statistic): 8.15e-11
Time: 23:40:47 Log-Likelihood: 18.049
No. Observations: 182 AIC: -18.10
Df Residuals: 173 BIC: 10.74
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -1.4369 0.659 -2.179 0.031 -2.739 -0.135
GOLD_1 -0.5056 0.655 -0.772 0.441 -1.798 0.786
GOLD_4 0.4982 0.615 0.810 0.419 -0.716 1.712
Dow 1.1792 0.516 2.283 0.024 0.160 2.199
Dow_1 0.9297 0.513 1.811 0.072 -0.084 1.943
Dow_4 0.0122 0.514 0.024 0.981 -1.002 1.026
DXY -10.6771 1.909 -5.592 0.000 -14.446 -6.908
DXY_1 5.4405 1.913 2.843 0.005 1.664 9.217
DXY_4 -2.7612 1.811 -1.524 0.129 -6.336 0.814
==============================================================================
Omnibus: 65.632 Durbin-Watson: 2.613
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1117.897
Skew: 0.796 Prob(JB): 1.79e-243
Kurtosis: 15.037 Cond. No. 6.37
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 ETH w stock
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['Dow','Dow_1', 'Dow_4',
'SP500','SP500_1', 'SP500_4',
'FTSE100','FTSE100_1' ,'FTSE100_4',
'SPASX200','SPASX200_1', 'SPASX200_4',
'SPTSX','SPTSX_1','SPTSX_4',
'CAC40','CAC40_1', 'CAC40_4',
'DAX','DAX_1', 'DAX_4',
'HS','HS_1', 'HS_4',
'KOSPI','KOSPI_1', 'KOSPI_4',
'SMI','SMI_1', 'SMI_4',
'NIKKEI','NIKKEI_1','NIKKEI_4',
'NFTY_1','NFTY_1', 'NFTY_4',
'SET','SET_1', 'SET_4']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.733
Model: OLS Adj. R-squared (uncentered): 0.663
Method: Least Squares F-statistic: 10.41
Date: Fri, 23 Apr 2021 Prob (F-statistic): 7.66e-26
Time: 23:41:15 Log-Likelihood: 104.07
No. Observations: 182 AIC: -132.1
Df Residuals: 144 BIC: -10.39
Df Model: 38
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow -1.4881 1.971 -0.755 0.451 -5.383 2.407
Dow_1 1.5368 1.956 0.786 0.433 -2.329 5.403
Dow_4 -5.0203 1.926 -2.607 0.010 -8.827 -1.214
SP500 0.0018 2.139 0.001 0.999 -4.226 4.229
SP500_1 -0.4613 2.057 -0.224 0.823 -4.527 3.605
SP500_4 1.3156 2.079 0.633 0.528 -2.794 5.425
FTSE100 3.6323 1.126 3.226 0.002 1.407 5.857
FTSE100_1 1.0391 1.126 0.923 0.358 -1.187 3.265
FTSE100_4 -0.0380 1.071 -0.036 0.972 -2.155 2.079
SPASX200 1.0039 1.046 0.960 0.339 -1.064 3.072
SPASX200_1 -0.6127 1.011 -0.606 0.545 -2.610 1.385
SPASX200_4 -1.2586 0.891 -1.413 0.160 -3.020 0.502
SPTSX 2.5370 1.375 1.846 0.067 -0.180 5.254
SPTSX_1 -1.4559 1.404 -1.037 0.301 -4.230 1.318
SPTSX_4 3.4138 1.348 2.533 0.012 0.750 6.077
CAC40 2.0670 1.286 1.607 0.110 -0.475 4.609
CAC40_1 -3.9171 1.268 -3.090 0.002 -6.422 -1.412
CAC40_4 1.0113 1.209 0.836 0.404 -1.379 3.402
DAX -3.0607 1.221 -2.507 0.013 -5.474 -0.648
DAX_1 2.4141 1.194 2.022 0.045 0.055 4.773
DAX_4 -1.2350 1.199 -1.030 0.305 -3.605 1.135
HS 2.7303 0.654 4.173 0.000 1.437 4.024
HS_1 0.4533 0.629 0.721 0.472 -0.790 1.696
HS_4 0.4231 0.643 0.658 0.512 -0.849 1.695
KOSPI 0.0534 0.920 0.058 0.954 -1.766 1.872
KOSPI_1 -0.3397 0.929 -0.366 0.715 -2.176 1.496
KOSPI_4 0.4667 0.983 0.475 0.636 -1.477 2.410
SMI -1.3120 1.062 -1.236 0.219 -3.410 0.786
SMI_1 1.0656 1.104 0.965 0.336 -1.116 3.247
SMI_4 -1.2256 1.031 -1.189 0.237 -3.264 0.813
NIKKEI -2.5208 0.870 -2.898 0.004 -4.240 -0.802
NIKKEI_1 0.1739 0.850 0.205 0.838 -1.506 1.854
NIKKEI_4 2.0131 0.832 2.420 0.017 0.369 3.657
NFTY_1 0.2358 0.374 0.631 0.529 -0.503 0.974
NFTY_1 0.2358 0.374 0.631 0.529 -0.503 0.974
NFTY_4 0.8320 0.696 1.195 0.234 -0.544 2.208
SET 0.2978 0.767 0.388 0.698 -1.218 1.814
SET_1 -0.3313 0.816 -0.406 0.685 -1.943 1.281
SET_4 -0.4113 0.794 -0.518 0.605 -1.980 1.157
==============================================================================
Omnibus: 4.675 Durbin-Watson: 2.079
Prob(Omnibus): 0.097 Jarque-Bera (JB): 5.310
Skew: -0.182 Prob(JB): 0.0703
Kurtosis: 3.753 Cond. No. 2.25e+15
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The smallest eigenvalue is 5.11e-31. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
#2 btc w stock
ols_1 = OLS(model_df_lag['ETH'] , x_new[:,0:3] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.591
Model: OLS Adj. R-squared (uncentered): 0.584
Method: Least Squares F-statistic: 86.32
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.38e-34
Time: 23:41:22 Log-Likelihood: 65.275
No. Observations: 182 AIC: -124.5
Df Residuals: 179 BIC: -114.9
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 0.0258 0.004 6.392 0.000 0.018 0.034
x2 0.0319 0.013 2.411 0.017 0.006 0.058
x3 0.2100 0.014 14.570 0.000 0.182 0.238
==============================================================================
Omnibus: 6.495 Durbin-Watson: 2.172
Prob(Omnibus): 0.039 Jarque-Bera (JB): 10.437
Skew: 0.072 Prob(JB): 0.00542
Kurtosis: 4.164 Cond. No. 3.57
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 ETH wi dxy
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['DXY' ,'DXY_1' ,'DXY_2','DXY_3', 'DXY_4' ,'DXY_5' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.275
Model: OLS Adj. R-squared (uncentered): 0.250
Method: Least Squares F-statistic: 11.12
Date: Fri, 23 Apr 2021 Prob (F-statistic): 1.66e-10
Time: 23:40:52 Log-Likelihood: 13.113
No. Observations: 182 AIC: -14.23
Df Residuals: 176 BIC: 4.999
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -10.7423 1.704 -6.303 0.000 -14.106 -7.379
DXY_1 4.2066 1.782 2.360 0.019 0.689 7.724
DXY_2 -1.0606 1.765 -0.601 0.549 -4.545 2.423
DXY_3 -1.5943 1.765 -0.903 0.368 -5.078 1.890
DXY_4 -3.0962 1.764 -1.756 0.081 -6.577 0.384
DXY_5 0.1058 1.685 0.063 0.950 -3.220 3.432
==============================================================================
Omnibus: 73.659 Durbin-Watson: 2.629
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1713.536
Skew: 0.865 Prob(JB): 0.00
Kurtosis: 17.932 Cond. No. 1.74
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[[
# 'USDT','USDT_1', 'USDT_2', 'USDT_3', 'USDT_4', 'USDT_5',
'GOLD','GOLD_1', 'GOLD_4',
'OIL','OIL_1', 'OIL_4',
'US_10Y','US_10Y_1', 'US_10Y_4',
'US_2Y','US_2Y_1', 'US_2Y_4',
'US_3M', 'US_3M_1', 'US_3M_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.348
Model: OLS Adj. R-squared (uncentered): 0.290
Method: Least Squares F-statistic: 5.947
Date: Fri, 23 Apr 2021 Prob (F-statistic): 8.10e-10
Time: 23:41:30 Log-Likelihood: 22.804
No. Observations: 182 AIC: -15.61
Df Residuals: 167 BIC: 32.45
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -0.1132 0.667 -0.170 0.865 -1.430 1.204
GOLD_1 0.0322 0.696 0.046 0.963 -1.343 1.407
GOLD_4 -0.0188 0.646 -0.029 0.977 -1.295 1.257
OIL 1.6478 0.242 6.816 0.000 1.171 2.125
OIL_1 -0.4699 0.264 -1.783 0.076 -0.990 0.050
OIL_4 0.0736 0.231 0.318 0.751 -0.383 0.530
US_10Y 0.5720 0.350 1.635 0.104 -0.119 1.263
US_10Y_1 -0.2059 0.341 -0.605 0.546 -0.878 0.467
US_10Y_4 0.1565 0.333 0.470 0.639 -0.500 0.813
US_2Y -0.4073 0.308 -1.323 0.188 -1.015 0.200
US_2Y_1 0.4919 0.309 1.590 0.114 -0.119 1.103
US_2Y_4 -0.0092 0.309 -0.030 0.976 -0.619 0.601
US_3M -0.2290 0.111 -2.057 0.041 -0.449 -0.009
US_3M_1 0.0794 0.104 0.765 0.445 -0.125 0.284
US_3M_4 -0.2620 0.100 -2.631 0.009 -0.459 -0.065
==============================================================================
Omnibus: 58.860 Durbin-Watson: 2.554
Prob(Omnibus): 0.000 Jarque-Bera (JB): 802.760
Skew: 0.722 Prob(JB): 4.82e-175
Kurtosis: 13.187 Cond. No. 14.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['DXY',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M',
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.355
Model: OLS Adj. R-squared (uncentered): 0.333
Method: Least Squares F-statistic: 16.16
Date: Fri, 23 Apr 2021 Prob (F-statistic): 8.85e-15
Time: 23:42:00 Log-Likelihood: 23.796
No. Observations: 182 AIC: -35.59
Df Residuals: 176 BIC: -16.37
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -9.9977 1.723 -5.803 0.000 -13.398 -6.597
GOLD -1.0300 0.626 -1.645 0.102 -2.266 0.206
OIL 1.1347 0.223 5.092 0.000 0.695 1.574
US_10Y 0.2850 0.287 0.994 0.322 -0.281 0.851
US_2Y -0.2108 0.249 -0.847 0.398 -0.702 0.280
US_3M -0.1836 0.075 -2.454 0.015 -0.331 -0.036
==============================================================================
Omnibus: 53.473 Durbin-Watson: 2.326
Prob(Omnibus): 0.000 Jarque-Bera (JB): 403.711
Skew: 0.826 Prob(JB): 2.16e-88
Kurtosis: 10.107 Cond. No. 27.4
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#4 with other cryto
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[[
'ETH_1','ETH_4',
'BTC','BTC_1','BTC_4',
# 'USDT', 'USDT_1', 'USDT_4'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.735
Model: OLS Adj. R-squared (uncentered): 0.728
Method: Least Squares F-statistic: 98.38
Date: Fri, 23 Apr 2021 Prob (F-statistic): 3.22e-49
Time: 23:42:11 Log-Likelihood: 104.84
No. Observations: 182 AIC: -199.7
Df Residuals: 177 BIC: -183.7
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
ETH_1 -0.2311 0.073 -3.185 0.002 -0.374 -0.088
ETH_4 0.0509 0.072 0.705 0.481 -0.092 0.193
BTC 1.1019 0.056 19.820 0.000 0.992 1.212
BTC_1 0.1842 0.100 1.850 0.066 -0.012 0.381
BTC_4 0.0004 0.098 0.004 0.997 -0.193 0.194
==============================================================================
Omnibus: 44.853 Durbin-Watson: 1.964
Prob(Omnibus): 0.000 Jarque-Bera (JB): 599.015
Skew: 0.341 Prob(JB): 8.43e-131
Kurtosis: 11.861 Cond. No. 4.12
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# no lag
ols_1 = OLS(model_df['USDT'] , model_df[['GOLD','Dow','DXY']] )
results_1 = ols_1.fit()
print(results_1.summary())
# lag1 2 3 4 5
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[['GOLD', 'GOLD_1','GOLD_4' ,
# 'SP500','SP500_1','SP500_4',
'Dow', 'Dow_1' , 'Dow_4' ,
'DXY', 'DXY_1' , 'DXY_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.042
Model: OLS Adj. R-squared (uncentered): -0.004
Method: Least Squares F-statistic: 0.9230
Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.506
Time: 15:21:35 Log-Likelihood: 705.02
No. Observations: 198 AIC: -1392.
Df Residuals: 189 BIC: -1362.
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -0.0223 0.020 -1.102 0.272 -0.062 0.018
GOLD_1 -0.0193 0.020 -0.961 0.338 -0.059 0.020
GOLD_4 0.0141 0.019 0.740 0.460 -0.023 0.052
Dow -0.0024 0.016 -0.152 0.879 -0.033 0.029
Dow_1 0.0148 0.016 0.951 0.343 -0.016 0.046
Dow_4 -0.0106 0.015 -0.689 0.492 -0.041 0.020
DXY -0.1067 0.058 -1.834 0.068 -0.221 0.008
DXY_1 0.0540 0.058 0.926 0.356 -0.061 0.169
DXY_4 -0.0187 0.055 -0.343 0.732 -0.127 0.089
==============================================================================
Omnibus: 32.231 Durbin-Watson: 2.709
Prob(Omnibus): 0.000 Jarque-Bera (JB): 250.641
Skew: 0.106 Prob(JB): 3.75e-55
Kurtosis: 8.508 Cond. No. 6.18
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 USDT w stock
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[['Dow','Dow_1', 'Dow_4',
'SP500','SP500_1', 'SP500_4',
'FTSE100','FTSE100_1' ,'FTSE100_4',
'SPASX200','SPASX200_1', 'SPASX200_4',
'SPTSX','SPTSX_1','SPTSX_4',
'CAC40','CAC40_1', 'CAC40_4',
'DAX','DAX_1', 'DAX_4',
'HS','HS_1', 'HS_4',
'KOSPI','KOSPI_1', 'KOSPI_4',
'SMI','SMI_1', 'SMI_4',
'NIKKEI','NIKKEI_1','NIKKEI_4',
'NFTY_1','NFTY_1', 'NFTY_4',
'SET','SET_1', 'SET_4']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.168
Model: OLS Adj. R-squared (uncentered): -0.030
Method: Least Squares F-statistic: 0.8487
Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.718
Time: 15:21:41 Log-Likelihood: 718.94
No. Observations: 198 AIC: -1362.
Df Residuals: 160 BIC: -1237.
Df Model: 38
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow 0.0840 0.087 0.964 0.337 -0.088 0.256
Dow_1 -0.0467 0.086 -0.540 0.590 -0.218 0.124
Dow_4 -0.1340 0.085 -1.580 0.116 -0.302 0.033
SP500 -0.0435 0.094 -0.462 0.644 -0.230 0.142
SP500_1 0.0544 0.091 0.595 0.553 -0.126 0.235
SP500_4 0.0516 0.092 0.562 0.575 -0.130 0.233
FTSE100 0.0743 0.050 1.479 0.141 -0.025 0.174
FTSE100_1 0.0147 0.050 0.294 0.769 -0.084 0.114
FTSE100_4 -0.0633 0.049 -1.303 0.194 -0.159 0.033
SPASX200 0.0846 0.048 1.773 0.078 -0.010 0.179
SPASX200_1 -0.0127 0.044 -0.287 0.774 -0.100 0.074
SPASX200_4 -0.0107 0.040 -0.270 0.787 -0.089 0.068
SPTSX -0.0513 0.061 -0.843 0.401 -0.172 0.069
SPTSX_1 -0.0176 0.062 -0.284 0.776 -0.140 0.105
SPTSX_4 0.0819 0.060 1.375 0.171 -0.036 0.200
CAC40 -0.0188 0.058 -0.325 0.745 -0.133 0.096
CAC40_1 0.0235 0.056 0.417 0.677 -0.088 0.135
CAC40_4 0.0356 0.055 0.648 0.518 -0.073 0.144
DAX -0.0660 0.055 -1.199 0.232 -0.175 0.043
DAX_1 -0.0321 0.054 -0.592 0.555 -0.139 0.075
DAX_4 -0.0361 0.055 -0.661 0.510 -0.144 0.072
HS 0.0210 0.029 0.723 0.471 -0.036 0.078
HS_1 -0.0087 0.028 -0.308 0.759 -0.064 0.047
HS_4 -0.0097 0.028 -0.340 0.734 -0.066 0.046
KOSPI -0.0643 0.040 -1.588 0.114 -0.144 0.016
KOSPI_1 0.0355 0.041 0.860 0.391 -0.046 0.117
KOSPI_4 0.0268 0.043 0.615 0.539 -0.059 0.113
SMI -0.0073 0.048 -0.152 0.880 -0.102 0.088
SMI_1 0.0426 0.050 0.848 0.398 -0.057 0.142
SMI_4 -0.0067 0.047 -0.144 0.885 -0.099 0.086
NIKKEI -0.0018 0.039 -0.046 0.963 -0.078 0.075
NIKKEI_1 -0.0612 0.037 -1.647 0.101 -0.135 0.012
NIKKEI_4 0.0793 0.037 2.141 0.034 0.006 0.152
NFTY_1 0.0046 0.016 0.291 0.771 -0.027 0.036
NFTY_1 0.0046 0.016 0.291 0.771 -0.027 0.036
NFTY_4 -0.0389 0.030 -1.316 0.190 -0.097 0.019
SET 0.0342 0.035 0.989 0.324 -0.034 0.102
SET_1 -0.0232 0.037 -0.625 0.533 -0.096 0.050
SET_4 0.0349 0.036 0.977 0.330 -0.036 0.106
==============================================================================
Omnibus: 32.793 Durbin-Watson: 2.690
Prob(Omnibus): 0.000 Jarque-Bera (JB): 256.329
Skew: 0.142 Prob(JB): 2.18e-56
Kurtosis: 8.567 Cond. No. 1.26e+16
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The smallest eigenvalue is 1.61e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
#2 USDT wi dxy
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[['DXY' ,'DXY_1' ,'DXY_2','DXY_3', 'DXY_4' ,'DXY_5' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.038
Model: OLS Adj. R-squared (uncentered): 0.008
Method: Least Squares F-statistic: 1.270
Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.273
Time: 15:21:49 Log-Likelihood: 704.62
No. Observations: 198 AIC: -1397.
Df Residuals: 192 BIC: -1378.
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -0.0708 0.051 -1.390 0.166 -0.171 0.030
DXY_1 0.0725 0.053 1.366 0.173 -0.032 0.177
DXY_2 0.0527 0.053 0.999 0.319 -0.051 0.157
DXY_3 -0.0512 0.053 -0.968 0.334 -0.155 0.053
DXY_4 -0.0340 0.053 -0.644 0.521 -0.138 0.070
DXY_5 -0.0104 0.051 -0.206 0.837 -0.110 0.089
==============================================================================
Omnibus: 32.709 Durbin-Watson: 2.777
Prob(Omnibus): 0.000 Jarque-Bera (JB): 241.947
Skew: 0.196 Prob(JB): 2.90e-53
Kurtosis: 8.401 Cond. No. 1.71
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[[
# 'USDT','USDT_1', 'USDT_2', 'USDT_3', 'USDT_4', 'USDT_5',
'GOLD','GOLD_1', 'GOLD_4',
'OIL','OIL_1', 'OIL_4',
'US_10Y','US_10Y_1', 'US_10Y_4',
'US_2Y','US_2Y_1', 'US_2Y_4',
'US_3M', 'US_3M_1', 'US_3M_4' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.045
Model: OLS Adj. R-squared (uncentered): -0.033
Method: Least Squares F-statistic: 0.5767
Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.890
Time: 15:21:53 Log-Likelihood: 705.33
No. Observations: 198 AIC: -1381.
Df Residuals: 183 BIC: -1331.
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -0.0162 0.021 -0.776 0.439 -0.057 0.025
GOLD_1 -0.0171 0.022 -0.793 0.429 -0.060 0.025
GOLD_4 0.0282 0.020 1.401 0.163 -0.012 0.068
OIL 0.0107 0.007 1.459 0.146 -0.004 0.025
OIL_1 -0.0024 0.008 -0.309 0.758 -0.018 0.013
OIL_4 -0.0082 0.007 -1.139 0.256 -0.022 0.006
US_10Y 0.0041 0.011 0.388 0.698 -0.017 0.025
US_10Y_1 -0.0066 0.010 -0.656 0.513 -0.026 0.013
US_10Y_4 0.0027 0.010 0.258 0.797 -0.018 0.023
US_2Y -0.0062 0.009 -0.689 0.492 -0.024 0.012
US_2Y_1 0.0041 0.009 0.476 0.635 -0.013 0.021
US_2Y_4 0.0095 0.009 1.017 0.311 -0.009 0.028
US_3M 0.0015 0.003 0.496 0.621 -0.004 0.008
US_3M_1 -0.0025 0.003 -0.835 0.405 -0.008 0.003
US_3M_4 -0.0015 0.003 -0.518 0.605 -0.007 0.004
==============================================================================
Omnibus: 30.561 Durbin-Watson: 2.686
Prob(Omnibus): 0.000 Jarque-Bera (JB): 214.571
Skew: 0.126 Prob(JB): 2.55e-47
Kurtosis: 8.094 Cond. No. 14.2
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[[
'GOLD', 'GOLD_1','GOLD_4',
'OIL', 'OIL_1','OIL_4',
'US_10Y','US_10Y_1','US_10Y_4',
'US_2Y','US_2Y_1','US_2Y_4',
'US_3M', 'US_3M_1' ,'US_3M_4'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.045
Model: OLS Adj. R-squared (uncentered): -0.033
Method: Least Squares F-statistic: 0.5767
Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.890
Time: 15:21:58 Log-Likelihood: 705.33
No. Observations: 198 AIC: -1381.
Df Residuals: 183 BIC: -1331.
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD -0.0162 0.021 -0.776 0.439 -0.057 0.025
GOLD_1 -0.0171 0.022 -0.793 0.429 -0.060 0.025
GOLD_4 0.0282 0.020 1.401 0.163 -0.012 0.068
OIL 0.0107 0.007 1.459 0.146 -0.004 0.025
OIL_1 -0.0024 0.008 -0.309 0.758 -0.018 0.013
OIL_4 -0.0082 0.007 -1.139 0.256 -0.022 0.006
US_10Y 0.0041 0.011 0.388 0.698 -0.017 0.025
US_10Y_1 -0.0066 0.010 -0.656 0.513 -0.026 0.013
US_10Y_4 0.0027 0.010 0.258 0.797 -0.018 0.023
US_2Y -0.0062 0.009 -0.689 0.492 -0.024 0.012
US_2Y_1 0.0041 0.009 0.476 0.635 -0.013 0.021
US_2Y_4 0.0095 0.009 1.017 0.311 -0.009 0.028
US_3M 0.0015 0.003 0.496 0.621 -0.004 0.008
US_3M_1 -0.0025 0.003 -0.835 0.405 -0.008 0.003
US_3M_4 -0.0015 0.003 -0.518 0.605 -0.007 0.004
==============================================================================
Omnibus: 30.561 Durbin-Watson: 2.686
Prob(Omnibus): 0.000 Jarque-Bera (JB): 214.571
Skew: 0.126 Prob(JB): 2.55e-47
Kurtosis: 8.094 Cond. No. 14.2
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#4 with other cryto
ols_1 = OLS(model_df_lag['USDT'] , model_df_lag[[
'BTC','BTC_1','BTC_4',
'ETH' ,'ETH_1','ETH_4',
'USDT_1', 'USDT_4'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: USDT R-squared (uncentered): 0.259
Model: OLS Adj. R-squared (uncentered): 0.227
Method: Least Squares F-statistic: 8.284
Date: Thu, 22 Apr 2021 Prob (F-statistic): 1.30e-09
Time: 16:11:14 Log-Likelihood: 730.38
No. Observations: 198 AIC: -1445.
Df Residuals: 190 BIC: -1418.
Df Model: 8
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
BTC -0.0040 0.004 -0.914 0.362 -0.013 0.005
BTC_1 -0.0004 0.004 -0.097 0.923 -0.009 0.008
BTC_4 -0.0079 0.004 -1.792 0.075 -0.017 0.001
ETH 0.0076 0.003 2.284 0.023 0.001 0.014
ETH_1 0.0059 0.003 1.695 0.092 -0.001 0.013
ETH_4 0.0083 0.003 2.557 0.011 0.002 0.015
USDT_1 -0.3968 0.069 -5.750 0.000 -0.533 -0.261
USDT_4 -0.1700 0.058 -2.920 0.004 -0.285 -0.055
==============================================================================
Omnibus: 31.800 Durbin-Watson: 1.939
Prob(Omnibus): 0.000 Jarque-Bera (JB): 247.157
Skew: -0.058 Prob(JB): 2.14e-54
Kurtosis: 8.472 Cond. No. 58.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# PCA w/o outliner
dff = data_for_analysis_ret.dropna()
dff2 = data_for_analysis_ret.dropna()
for i in ['BTC','ETH','GOLD','US_3M','Dow','DXY']:
dff = dff[ (abs(dff[i]) < abs(np.percentile(dff[i], 99)) ) ]
col_all = ['GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC',
'ETH', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'CAC40',
'DAX','SHAI', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET']
col_1 = ['BTC','ETH', 'USDT']
col_2 = ['BTC','GOLD','US_3M','Dow','DXY','ETH']
col_3 = ['ETH','GOLD', 'US_3M','Dow','DXY']
col_4 = ['USDT','GOLD', 'US_3M','Dow','DXY']
data_for_analysis_ret.dropna().columns
Index(['par_week', 'GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC',
'ETH', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'SHAI', 'CAC40',
'DAX', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET'],
dtype='object')
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_1]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-34-65a9adee40c6> in <module> 3 from sklearn.preprocessing import StandardScaler 4 ----> 5 x = dff.dropna().iloc[:,1:][col_1] 6 x = StandardScaler().fit_transform(x) 7 ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2906 if is_iterator(key): 2907 key = list(key) -> 2908 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] 2909 2910 # take() does not accept boolean indexers ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1302 if raise_missing: 1303 not_found = list(set(key) - set(ax)) -> 1304 raise KeyError(f"{not_found} not in index") 1305 1306 # we skip the warning on Categorical KeyError: "['USDT'] not in index"
print(pca.explained_variance_ratio_)
[0.27895412 0.24589623]
print(abs( pca.components_ ))
[[0.0919427 0.73334125 0.11912736 0.26606862 0.60726708] [0.46343902 0.00443008 0.38547413 0.66417546 0.4421372 ]]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'BLACK', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-101-85719136021c> in <module> 2 mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style 3 # Call the biplot function for only the first 2 PCs ----> 4 biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None") 5 plt.show() <ipython-input-100-166eb86b931d> in biplot(score, coeff, y) 19 #plot as arrows the variable scores (each variable has a score for PC1 and one for PC2) 20 plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2) ---> 21 plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'BLACK', ha = 'center', va = 'center',fontsize=15) 22 23 plt.xlabel("PC{}".format(1), size=14) IndexError: list index out of range
#2 PCA BTC on ASSET type
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_2]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.31055068 0.22320409 0.17941137]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(2), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,1:3], np.transpose(pca.components_[1:
3, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,[0,2]], np.transpose(pca.components_[[0,2], :]), y = "None")
plt.show()
# PCA w outliner
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_1]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-44-0e07390d9720> in <module> 5 from sklearn.preprocessing import StandardScaler 6 ----> 7 x = data_for_analysis_ret.dropna().iloc[:,1:][col_1] 8 x = StandardScaler().fit_transform(x) 9 ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2906 if is_iterator(key): 2907 key = list(key) -> 2908 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] 2909 2910 # take() does not accept boolean indexers ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1302 if raise_missing: 1303 not_found = list(set(key) - set(ax)) -> 1304 raise KeyError(f"{not_found} not in index") 1305 1306 # we skip the warning on Categorical KeyError: "['USDT'] not in index"
print(pca.explained_variance_ratio_)
[0.31055068 0.22320409 0.17941137]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-47-85719136021c> in <module> 2 mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style 3 # Call the biplot function for only the first 2 PCs ----> 4 biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None") 5 plt.show() <ipython-input-46-48dc61da9584> in biplot(score, coeff, y) 19 #plot as arrows the variable scores (each variable has a score for PC1 and one for PC2) 20 plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2) ---> 21 plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'k', ha = 'center', va = 'center',fontsize=15) 22 23 plt.xlabel("PC{}".format(1), size=14) IndexError: list index out of range
#2 PCA on asset type
# PCA w outliner
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_2]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.43213452 0.20424465 0.15773532]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
# 2 BTC with all data outliner
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_2]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.43213452 0.20424465 0.15773532]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(2), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,1:3], np.transpose(pca.components_[1:3, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,[0,2]], np.transpose(pca.components_[[0,2], :]), y = "None")
plt.show()
#3 PCA ETH on ASSET type
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_3]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
col_3
['ETH', 'GOLD', 'US_3M', 'Dow', 'DXY']
print(pca.explained_variance_ratio_)
[0.27621732 0.250375 ]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_3[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
# 3 ETH with WITH ALL data outlier
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_3]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.40252062 0.2197299 ]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_3[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
#4 PCA ETH on ASSET type no outlier
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_4]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-69-180f1c1b6be1> in <module> 5 from sklearn.preprocessing import StandardScaler 6 ----> 7 x = dff.dropna().iloc[:,1:][col_4] 8 x = StandardScaler().fit_transform(x) 9 ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2906 if is_iterator(key): 2907 key = list(key) -> 2908 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] 2909 2910 # take() does not accept boolean indexers ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1302 if raise_missing: 1303 not_found = list(set(key) - set(ax)) -> 1304 raise KeyError(f"{not_found} not in index") 1305 1306 # we skip the warning on Categorical KeyError: "['USDT'] not in index"
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_4[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
#4 PCA ETH on ASSET type WITH ALL data outlier
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_4]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-72-8df5d912000b> in <module> 5 from sklearn.preprocessing import StandardScaler 6 ----> 7 x = data_for_analysis_ret.dropna().iloc[:,1:][col_4] 8 x = StandardScaler().fit_transform(x) 9 ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2906 if is_iterator(key): 2907 key = list(key) -> 2908 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] 2909 2910 # take() does not accept boolean indexers ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1302 if raise_missing: 1303 not_found = list(set(key) - set(ax)) -> 1304 raise KeyError(f"{not_found} not in index") 1305 1306 # we skip the warning on Categorical KeyError: "['USDT'] not in index"
print(pca.explained_variance_ratio_)
[0.40252062 0.2197299 ]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_4[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
#EXTRA with completely no outlier 80%
#4 PCA ETH on ASSET type
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_4]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_new = pca.fit_transform(x)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-76-07a48dfdd3ad> in <module> 7 from sklearn.preprocessing import StandardScaler 8 ----> 9 x = dff.dropna().iloc[:,1:][col_4] 10 x = StandardScaler().fit_transform(x) 11 ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2906 if is_iterator(key): 2907 key = list(key) -> 2908 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] 2909 2910 # take() does not accept boolean indexers ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) 1253 -> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) 1255 return keyarr, indexer 1256 ~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1302 if raise_missing: 1303 not_found = list(set(key) - set(ax)) -> 1304 raise KeyError(f"{not_found} not in index") 1305 1306 # we skip the warning on Categorical KeyError: "['USDT'] not in index"
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_4[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
#monthly data
for index, value in enumerate(df_list):
agg_list_p = ["last"]
agg_list_r = ["std","mean"]
close_col = "Close"
ret_col = "return"
# exec("{x} = {y}.groupby(['par_month']).agg({close_col:agg_list_p , ret_col:agg_list_r}).reset_index() ".format(y = df_list[index], x = grp_list[index]))
exec("{x} = {y}.groupby(['par_month'])[['Close']].last().reset_index()".format(y = df_list[index], x = grp_list[index]))
name_p = P_list[index]
name_agg = ["par_month"]
for i in agg_list_p:
name_agg.append(name_p + "_" + i)
# name_r = R_list[index]
# for i in agg_list_r:
# name_agg.append(name_r + "_" + i)
exec("{x}.columns = name_agg".format(x =grp_list[index] ))
prep_merge_data = Alter_GOLD_grp
for index, value in enumerate(grp_list[1:]):
exec('prep_merge_data= pd.merge(prep_merge_data , {x} , how = "left", on = ["par_month"]) '.format(x= grp_list[1:][index]))
prep_merge_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 376 entries, 0 to 375 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 par_month 376 non-null int64 1 GOLD_P_last 376 non-null float64 2 OIL_P_last 376 non-null float64 3 US_10Y_P_last 376 non-null float64 4 US_2Y_P_last 375 non-null float64 5 US_3M_P_last 376 non-null float64 6 DXY_P_last 376 non-null float64 7 BTC_P_last 130 non-null float64 8 ETH_P_last 62 non-null float64 9 USDT_P_last 49 non-null float64 10 Dow_P_last 172 non-null float64 11 SP500_P_last 184 non-null float64 12 FTSE100_P_last 244 non-null float64 13 SPASX200_P_last 347 non-null float64 14 SPTSX_P_last 376 non-null float64 15 SHAI_P_last 365 non-null float64 16 CAC40_P_last 376 non-null float64 17 DAX_P_last 244 non-null float64 18 HS_P_last 245 non-null float64 19 KOSPI_P_last 371 non-null float64 20 SMI_P_last 376 non-null float64 21 NIKKEI_P_last 244 non-null float64 22 NFTY_P_last 306 non-null float64 23 SET_P_last 376 non-null float64 dtypes: float64(23), int64(1) memory usage: 73.4 KB
prep_merge_data.columns
Index(['par_month', 'GOLD_P_last', 'OIL_P_last', 'US_10Y_P_last',
'US_2Y_P_last', 'US_3M_P_last', 'DXY_P_last', 'BTC_P_last',
'ETH_P_last', 'USDT_P_last', 'Dow_P_last', 'SP500_P_last',
'FTSE100_P_last', 'SPASX200_P_last', 'SPTSX_P_last', 'SHAI_P_last',
'CAC40_P_last', 'DAX_P_last', 'HS_P_last', 'KOSPI_P_last', 'SMI_P_last',
'NIKKEI_P_last', 'NFTY_P_last', 'SET_P_last'],
dtype='object')
# monthly data plot
prep_merge_data_201715 = prep_merge_data[prep_merge_data['par_month'] > 201602][prep_merge_data['par_month'] < 202150]
<ipython-input-72-ceb0b21e8bff>:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index. prep_merge_data_201715 = prep_merge_data[prep_merge_data['par_month'] > 201602][prep_merge_data['par_month'] < 202150]
prep_merge_data_201715
| par_month | GOLD_P_last | OIL_P_last | US_10Y_P_last | US_2Y_P_last | US_3M_P_last | DXY_P_last | BTC_P_last | ETH_P_last | USDT_P_last | ... | SPTSX_P_last | SHAI_P_last | CAC40_P_last | DAX_P_last | HS_P_last | KOSPI_P_last | SMI_P_last | NIKKEI_P_last | NFTY_P_last | SET_P_last | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 314 | 201603 | 1280.90 | 38.34 | 1.770 | 0.7250 | 0.213 | 94.59 | 415.7 | 11.41 | NaN | ... | 13494.36 | 3003.92 | 4385.06 | 9965.51 | 20776.70 | 1995.85 | 7807.89 | 16758.67 | 7738.40 | 1407.70 |
| 315 | 201604 | 1341.40 | 45.92 | 1.835 | 0.7820 | 0.205 | 93.08 | 448.5 | 8.87 | NaN | ... | 13951.45 | 2938.32 | 4428.96 | 10038.97 | 21067.05 | 1994.15 | 7960.85 | 16666.05 | 7849.80 | 1404.61 |
| 316 | 201605 | 1269.30 | 49.10 | 1.851 | 0.8830 | 0.302 | 95.89 | 528.9 | 14.01 | NaN | ... | 14065.78 | 2916.62 | 4505.62 | 10262.74 | 20815.09 | 1983.40 | 8216.42 | 17234.98 | 8160.10 | 1424.28 |
| 317 | 201606 | 1375.00 | 48.33 | 1.475 | 0.5860 | 0.266 | 96.14 | 670.0 | 12.50 | NaN | ... | 14064.54 | 2929.61 | 4237.48 | 9680.09 | 20794.37 | 1970.35 | 8020.15 | 15575.92 | 8287.75 | 1444.99 |
| 318 | 201607 | 1415.70 | 41.60 | 1.463 | 0.6632 | 0.264 | 95.53 | 621.9 | 11.86 | NaN | ... | 14582.74 | 2979.34 | 4439.81 | 10337.50 | 21891.37 | 2016.19 | 8127.20 | 16569.27 | 8638.50 | 1524.07 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 371 | 202012 | 1895.10 | 48.52 | 0.916 | 0.1230 | 0.079 | 89.94 | 28949.4 | 735.94 | 1.0005 | ... | 17433.36 | 3473.07 | 5551.41 | 13718.78 | 27231.13 | 2873.47 | 10703.51 | 27444.17 | 13981.75 | 1449.35 |
| 372 | 202101 | 1850.30 | 52.20 | 1.071 | 0.1093 | 0.063 | 90.58 | 33108.1 | 1312.73 | 1.0009 | ... | 17337.02 | 3483.07 | 5399.21 | 13432.87 | 28283.71 | 2976.21 | 10591.06 | 27663.39 | 13634.60 | 1466.98 |
| 373 | 202102 | 1728.80 | 61.50 | 1.395 | 0.1250 | 0.041 | 90.88 | 45164.0 | 1418.76 | 1.0024 | ... | 18060.26 | 3509.08 | 5703.22 | 13786.29 | 28980.21 | 3012.95 | 10522.22 | 28966.01 | 14529.15 | 1496.78 |
| 374 | 202103 | 1713.80 | 59.16 | 1.744 | 0.1622 | 0.023 | 93.23 | 58763.7 | 1917.99 | 1.0002 | ... | 18700.67 | 3441.91 | 6067.23 | 15008.34 | 28378.35 | 3061.42 | 11047.37 | 29178.80 | 14690.70 | 1587.21 |
| 375 | 202104 | 1768.45 | 63.19 | 1.638 | 0.1798 | 0.025 | 91.00 | 54453.4 | 2606.53 | 1.0005 | ... | 19175.09 | 3439.79 | 6273.76 | 15249.27 | 28961.00 | 3183.35 | 11092.08 | 29122.00 | 14780.40 | 1576.09 |
62 rows × 24 columns
prep_merge_data_201715.columns
Index(['par_month', 'GOLD_P_last', 'OIL_P_last', 'US_10Y_P_last',
'US_2Y_P_last', 'US_3M_P_last', 'DXY_P_last', 'BTC_P_last',
'ETH_P_last', 'USDT_P_last', 'Dow_P_last', 'SP500_P_last',
'FTSE100_P_last', 'SPASX200_P_last', 'SPTSX_P_last', 'SHAI_P_last',
'CAC40_P_last', 'DAX_P_last', 'HS_P_last', 'KOSPI_P_last', 'SMI_P_last',
'NIKKEI_P_last', 'NFTY_P_last', 'SET_P_last'],
dtype='object')
prep_merge_data_new = prep_merge_data_201715[['par_month', 'GOLD_P_last', 'OIL_P_last', 'US_10Y_P_last',
'US_2Y_P_last', 'US_3M_P_last', 'DXY_P_last', 'BTC_P_last',
'ETH_P_last', 'Dow_P_last', 'SP500_P_last',
'FTSE100_P_last', 'SPASX200_P_last', 'SPTSX_P_last', 'SHAI_P_last',
'CAC40_P_last', 'DAX_P_last', 'HS_P_last', 'KOSPI_P_last', 'SMI_P_last',
'NIKKEI_P_last', 'NFTY_P_last', 'SET_P_last']]
non_par_month_col = [i for i in prep_merge_data_new.columns if i not in ['par_month']]
non_par_month_col
['GOLD_P_last', 'OIL_P_last', 'US_10Y_P_last', 'US_2Y_P_last', 'US_3M_P_last', 'DXY_P_last', 'BTC_P_last', 'ETH_P_last', 'Dow_P_last', 'SP500_P_last', 'FTSE100_P_last', 'SPASX200_P_last', 'SPTSX_P_last', 'SHAI_P_last', 'CAC40_P_last', 'DAX_P_last', 'HS_P_last', 'KOSPI_P_last', 'SMI_P_last', 'NIKKEI_P_last', 'NFTY_P_last', 'SET_P_last']
data_for_analysis_ret = prep_merge_data_new.copy()
for i in non_par_month_col:
exec('data_for_analysis_ret["{x}"] = np.log(data_for_analysis_ret["{x}"]) - np.log(data_for_analysis_ret["{x}"]).shift(1)'.format(x = i))
new_strings = []
for string in data_for_analysis_ret.columns:
new_string = string.replace("_P_last", "")
new_strings.append(new_string)
data_for_analysis_ret.columns = new_strings
non_par_month_col = [
'BTC',
'ETH',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M',
'DXY',
'Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'SHAI',
'KOSPI',
'SMI',
'NIKKEI',
'NFTY',
'SET']
datayr = data_for_analysis_ret[['BTC','ETH','Dow','GOLD','DXY','par_month']].dropna()
datayr['year'] = datayr['par_month'].astype('str').str.slice(0, 4).astype('int')
datayr2 = datayr.groupby(['year']).mean() * 12
datayr2[['BTC', 'ETH', 'Dow', 'GOLD', 'DXY']]
| BTC | ETH | Dow | GOLD | DXY | |
|---|---|---|---|---|---|
| year | |||||
| 2016 | 1.120673 | -0.473398 | 0.148092 | -0.049196 | 0.103304 |
| 2017 | 2.665601 | 4.522834 | 0.223790 | 0.120004 | -0.102202 |
| 2018 | -1.317444 | -1.720232 | -0.057950 | -0.032716 | 0.041290 |
| 2019 | 0.662711 | -0.020605 | 0.201621 | 0.140688 | 0.002285 |
| 2020 | 1.391969 | 1.739710 | 0.069960 | 0.200630 | -0.069260 |
| 2021 | 1.895387 | 3.793880 | 0.314116 | -0.207504 | 0.035150 |
import seaborn as sns
import matplotlib.pyplot as plt
df_for_cor = data_for_analysis_ret.dropna()[non_par_month_col]
correlation_mat = df_for_cor.corr()
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(correlation_mat, annot = True)
plt.show()
model_df= data_for_analysis_ret.dropna()
for i in non_par_month_col:
exec('model_df["{y}_1"] = model_df["{y}"].shift(1)'.format(y = i))
exec('model_df["{y}_2"] = model_df["{y}"].shift(2)'.format(y = i))
model_df_lag = model_df.dropna()
<string>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# no lag
ols_1 = OLS(model_df['BTC'] , model_df[['GOLD','Dow','DXY']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.125
Model: OLS Adj. R-squared (uncentered): 0.079
Method: Least Squares F-statistic: 2.754
Date: Fri, 23 Apr 2021 Prob (F-statistic): 0.0506
Time: 23:57:21 Log-Likelihood: 6.6334
No. Observations: 61 AIC: -7.267
Df Residuals: 58 BIC: -0.9342
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 1.1696 0.934 1.252 0.215 -0.700 3.039
Dow 1.6616 0.643 2.584 0.012 0.374 2.949
DXY 1.2741 2.067 0.616 0.540 -2.864 5.412
==============================================================================
Omnibus: 0.653 Durbin-Watson: 1.629
Prob(Omnibus): 0.721 Jarque-Bera (JB): 0.182
Skew: 0.065 Prob(JB): 0.913
Kurtosis: 3.234 Cond. No. 3.45
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# lag1 2 3 4 5
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['GOLD', 'GOLD_1','GOLD_2' ,
# 'SP500','SP500_1','SP500_2',
'Dow', 'Dow_1' , 'Dow_2' ,
'DXY', 'DXY_1' , 'DXY_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.308
Model: OLS Adj. R-squared (uncentered): 0.183
Method: Least Squares F-statistic: 2.473
Date: Fri, 23 Apr 2021 Prob (F-statistic): 0.0203
Time: 23:57:24 Log-Likelihood: 12.664
No. Observations: 59 AIC: -7.328
Df Residuals: 50 BIC: 11.37
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 1.3916 0.915 1.521 0.135 -0.446 3.230
GOLD_1 -2.7007 0.904 -2.987 0.004 -4.517 -0.885
GOLD_2 0.1510 0.911 0.166 0.869 -1.678 1.980
Dow 1.6271 0.635 2.564 0.013 0.352 2.902
Dow_1 0.5632 0.617 0.913 0.365 -0.675 1.802
Dow_2 0.3274 0.648 0.505 0.616 -0.974 1.629
DXY 1.6126 2.049 0.787 0.435 -2.504 5.729
DXY_1 -5.1193 2.011 -2.545 0.014 -9.159 -1.079
DXY_2 1.9281 2.091 0.922 0.361 -2.271 6.128
==============================================================================
Omnibus: 2.098 Durbin-Watson: 1.471
Prob(Omnibus): 0.350 Jarque-Bera (JB): 1.499
Skew: 0.382 Prob(JB): 0.473
Kurtosis: 3.164 Cond. No. 4.04
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 btc w stock
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['Dow','Dow_1', 'Dow_2',
'SP500','SP500_1', 'SP500_2',
'FTSE100','FTSE100_1' ,'FTSE100_2',
'SPASX200','SPASX200_1', 'SPASX200_2',
'SPTSX','SPTSX_1','SPTSX_2',
'CAC40','CAC40_1', 'CAC40_2',
'DAX','DAX_1', 'DAX_2',
'HS','HS_1', 'HS_2',
'SHAI', 'SHAI_1','SHAI_2' ,
'KOSPI','KOSPI_1', 'KOSPI_2',
'SMI','SMI_1', 'SMI_2',
'NIKKEI','NIKKEI_1','NIKKEI_2',
'NFTY_1','NFTY_1', 'NFTY_2',
'SET','SET_1', 'SET_2']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.720
Model: OLS Adj. R-squared (uncentered): 0.084
Method: Least Squares F-statistic: 1.131
Date: Fri, 23 Apr 2021 Prob (F-statistic): 0.401
Time: 23:57:25 Log-Likelihood: 39.397
No. Observations: 59 AIC: 3.207
Df Residuals: 18 BIC: 88.39
Df Model: 41
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow 0.8022 4.705 0.170 0.867 -9.083 10.687
Dow_1 -1.5617 5.566 -0.281 0.782 -13.255 10.132
Dow_2 -6.9747 5.090 -1.370 0.187 -17.668 3.718
SP500 2.6287 5.587 0.470 0.644 -9.110 14.367
SP500_1 -4.3393 6.692 -0.648 0.525 -18.399 9.720
SP500_2 8.9314 6.098 1.465 0.160 -3.880 21.743
FTSE100 0.6134 1.867 0.329 0.746 -3.309 4.535
FTSE100_1 -4.0085 2.125 -1.887 0.075 -8.473 0.455
FTSE100_2 -1.0970 1.990 -0.551 0.588 -5.278 3.084
SPASX200 4.7175 2.980 1.583 0.131 -1.543 10.978
SPASX200_1 0.9310 2.788 0.334 0.742 -4.926 6.788
SPASX200_2 0.8430 2.517 0.335 0.742 -4.446 6.132
SPTSX -4.3460 4.046 -1.074 0.297 -12.847 4.155
SPTSX_1 7.9275 4.057 1.954 0.066 -0.597 16.452
SPTSX_2 -3.2062 3.604 -0.890 0.385 -10.778 4.366
CAC40 -2.7247 2.990 -0.911 0.374 -9.007 3.558
CAC40_1 1.1256 3.193 0.352 0.729 -5.584 7.835
CAC40_2 1.7647 3.571 0.494 0.627 -5.737 9.267
DAX -0.8891 3.104 -0.286 0.778 -7.410 5.632
DAX_1 -0.9858 3.076 -0.320 0.752 -7.448 5.476
DAX_2 -1.3354 3.337 -0.400 0.694 -8.346 5.676
HS 0.5583 2.160 0.258 0.799 -3.980 5.096
HS_1 4.5706 1.870 2.444 0.025 0.642 8.499
HS_2 -1.2799 1.651 -0.775 0.448 -4.749 2.189
SHAI -2.0377 1.620 -1.258 0.225 -5.442 1.366
SHAI_1 -0.7097 1.745 -0.407 0.689 -4.376 2.957
SHAI_2 2.0092 1.681 1.195 0.248 -1.523 5.542
KOSPI 0.1346 1.847 0.073 0.943 -3.746 4.016
KOSPI_1 0.1258 2.187 0.058 0.955 -4.469 4.720
KOSPI_2 0.8957 1.923 0.466 0.647 -3.144 4.935
SMI -0.6794 2.613 -0.260 0.798 -6.170 4.811
SMI_1 2.4648 2.691 0.916 0.372 -3.189 8.119
SMI_2 1.9517 2.846 0.686 0.502 -4.027 7.930
NIKKEI 0.5597 1.679 0.333 0.743 -2.969 4.088
NIKKEI_1 1.6588 1.729 0.960 0.350 -1.973 5.291
NIKKEI_2 -0.5082 1.678 -0.303 0.765 -4.033 3.017
NFTY_1 -0.9293 0.744 -1.249 0.228 -2.492 0.634
NFTY_1 -0.9293 0.744 -1.249 0.228 -2.492 0.634
NFTY_2 -0.3668 1.422 -0.258 0.799 -3.355 2.621
SET 3.6664 1.994 1.839 0.082 -0.522 7.855
SET_1 -2.5088 2.435 -1.030 0.316 -7.624 2.606
SET_2 1.4252 2.328 0.612 0.548 -3.466 6.317
==============================================================================
Omnibus: 4.153 Durbin-Watson: 1.746
Prob(Omnibus): 0.125 Jarque-Bera (JB): 4.712
Skew: 0.013 Prob(JB): 0.0948
Kurtosis: 4.384 Cond. No. 1.24e+16
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The smallest eigenvalue is 8.99e-33. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
#2 btc w stock
ols_1 = OLS(data_for_analysis_ret.dropna()['BTC'] , data_for_analysis_ret.dropna()[['Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'SHAI',
'KOSPI',
'SMI',
'NIKKEI',
'NFTY',
'SET']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.312
Model: OLS Adj. R-squared (uncentered): 0.107
Method: Least Squares F-statistic: 1.522
Date: Fri, 23 Apr 2021 Prob (F-statistic): 0.140
Time: 23:58:20 Log-Likelihood: 13.973
No. Observations: 61 AIC: 0.05469
Df Residuals: 47 BIC: 29.61
Df Model: 14
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow 1.5205 3.081 0.493 0.624 -4.678 7.719
SP500 -1.9472 3.242 -0.601 0.551 -8.470 4.575
FTSE100 1.5095 1.361 1.109 0.273 -1.228 4.247
SPASX200 2.1797 1.433 1.521 0.135 -0.704 5.063
SPTSX -2.4140 2.059 -1.172 0.247 -6.556 1.728
CAC40 -2.5747 1.848 -1.393 0.170 -6.292 1.143
DAX -0.4926 1.902 -0.259 0.797 -4.320 3.335
HS -0.7512 1.101 -0.682 0.499 -2.967 1.464
SHAI 0.4940 1.002 0.493 0.624 -1.521 2.509
KOSPI 0.1835 1.242 0.148 0.883 -2.316 2.683
SMI 1.9185 1.740 1.103 0.276 -1.582 5.419
NIKKEI 1.0805 1.150 0.940 0.352 -1.232 3.393
NFTY 1.5486 0.855 1.812 0.076 -0.171 3.268
SET 1.0864 1.120 0.970 0.337 -1.167 3.340
==============================================================================
Omnibus: 0.469 Durbin-Watson: 1.449
Prob(Omnibus): 0.791 Jarque-Bera (JB): 0.158
Skew: 0.115 Prob(JB): 0.924
Kurtosis: 3.095 Cond. No. 22.3
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import sklearn
from sklearn.preprocessing import StandardScaler
x = model_df_lag[['Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'KOSPI',
'SMI',
'SHAI',
'NIKKEI',
'NFTY',
'SET']]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
data_for_analysis_ret.shape
(62, 23)
x_new.shape
(59, 3)
#2 btc w stock
ols_1 = OLS(model_df_lag.dropna()['BTC'] , x_new )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.130
Model: OLS Adj. R-squared (uncentered): 0.084
Method: Least Squares F-statistic: 2.796
Date: Fri, 23 Apr 2021 Prob (F-statistic): 0.0485
Time: 23:59:37 Log-Likelihood: 5.9197
No. Observations: 59 AIC: -5.839
Df Residuals: 56 BIC: 0.3933
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 -0.0221 0.009 -2.361 0.022 -0.041 -0.003
x2 0.0212 0.027 0.779 0.439 -0.033 0.076
x3 -0.0502 0.034 -1.485 0.143 -0.118 0.018
==============================================================================
Omnibus: 0.188 Durbin-Watson: 1.516
Prob(Omnibus): 0.910 Jarque-Bera (JB): 0.097
Skew: -0.095 Prob(JB): 0.953
Kurtosis: 2.939 Cond. No. 3.61
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 btc wi dxy
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['DXY' ,'DXY_1' ,'DXY_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.068
Model: OLS Adj. R-squared (uncentered): 0.018
Method: Least Squares F-statistic: 1.361
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.264
Time: 00:06:32 Log-Likelihood: 3.8784
No. Observations: 59 AIC: -1.757
Df Residuals: 56 BIC: 4.476
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -1.1827 1.867 -0.633 0.529 -4.923 2.558
DXY_1 -2.8417 1.861 -1.527 0.132 -6.571 0.887
DXY_2 2.3075 1.879 1.228 0.225 -1.457 6.072
==============================================================================
Omnibus: 1.877 Durbin-Watson: 1.412
Prob(Omnibus): 0.391 Jarque-Bera (JB): 1.261
Skew: 0.007 Prob(JB): 0.532
Kurtosis: 2.284 Cond. No. 1.13
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[[
# 'USDT','USDT_1', 'USDT_2', 'USDT_3', 'USDT_2', 'USDT_5',
'GOLD','GOLD_1', 'GOLD_2',
'OIL','OIL_1', 'OIL_2',
'US_10Y','US_10Y_1', 'US_10Y_2',
'US_2Y','US_2Y_1', 'US_2Y_2',
'US_3M', 'US_3M_1', 'US_3M_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.281
Model: OLS Adj. R-squared (uncentered): 0.036
Method: Least Squares F-statistic: 1.146
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.347
Time: 00:06:32 Log-Likelihood: 11.535
No. Observations: 59 AIC: 6.931
Df Residuals: 44 BIC: 38.09
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.9256 1.141 0.811 0.422 -1.374 3.225
GOLD_1 -2.0443 1.147 -1.783 0.081 -4.355 0.266
GOLD_2 -1.3853 1.082 -1.280 0.207 -3.566 0.795
OIL 0.4363 0.340 1.283 0.206 -0.249 1.122
OIL_1 0.3128 0.294 1.063 0.293 -0.280 0.906
OIL_2 -0.0373 0.285 -0.131 0.896 -0.611 0.537
US_10Y 0.0819 0.512 0.160 0.874 -0.951 1.115
US_10Y_1 0.3407 0.528 0.646 0.522 -0.723 1.404
US_10Y_2 0.0836 0.507 0.165 0.870 -0.938 1.105
US_2Y -0.2354 0.452 -0.521 0.605 -1.146 0.675
US_2Y_1 -0.2718 0.425 -0.640 0.526 -1.128 0.584
US_2Y_2 -0.6340 0.463 -1.370 0.178 -1.566 0.298
US_3M 0.1106 0.192 0.576 0.568 -0.277 0.498
US_3M_1 -0.0767 0.196 -0.391 0.698 -0.472 0.319
US_3M_2 0.4614 0.238 1.937 0.059 -0.019 0.942
==============================================================================
Omnibus: 0.358 Durbin-Watson: 1.468
Prob(Omnibus): 0.836 Jarque-Bera (JB): 0.528
Skew: 0.125 Prob(JB): 0.768
Kurtosis: 2.609 Cond. No. 19.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[[
'GOLD', 'GOLD_1','GOLD_2',
'OIL', 'OIL_1','OIL_2',
'US_10Y','US_10Y_1','US_10Y_2',
'US_2Y','US_2Y_1','US_2Y_2',
'US_3M', 'US_3M_1' ,'US_3M_2'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.281
Model: OLS Adj. R-squared (uncentered): 0.036
Method: Least Squares F-statistic: 1.146
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.347
Time: 00:06:33 Log-Likelihood: 11.535
No. Observations: 59 AIC: 6.931
Df Residuals: 44 BIC: 38.09
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.9256 1.141 0.811 0.422 -1.374 3.225
GOLD_1 -2.0443 1.147 -1.783 0.081 -4.355 0.266
GOLD_2 -1.3853 1.082 -1.280 0.207 -3.566 0.795
OIL 0.4363 0.340 1.283 0.206 -0.249 1.122
OIL_1 0.3128 0.294 1.063 0.293 -0.280 0.906
OIL_2 -0.0373 0.285 -0.131 0.896 -0.611 0.537
US_10Y 0.0819 0.512 0.160 0.874 -0.951 1.115
US_10Y_1 0.3407 0.528 0.646 0.522 -0.723 1.404
US_10Y_2 0.0836 0.507 0.165 0.870 -0.938 1.105
US_2Y -0.2354 0.452 -0.521 0.605 -1.146 0.675
US_2Y_1 -0.2718 0.425 -0.640 0.526 -1.128 0.584
US_2Y_2 -0.6340 0.463 -1.370 0.178 -1.566 0.298
US_3M 0.1106 0.192 0.576 0.568 -0.277 0.498
US_3M_1 -0.0767 0.196 -0.391 0.698 -0.472 0.319
US_3M_2 0.4614 0.238 1.937 0.059 -0.019 0.942
==============================================================================
Omnibus: 0.358 Durbin-Watson: 1.468
Prob(Omnibus): 0.836 Jarque-Bera (JB): 0.528
Skew: 0.125 Prob(JB): 0.768
Kurtosis: 2.609 Cond. No. 19.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[['DXY',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.094
Model: OLS Adj. R-squared (uncentered): -0.008
Method: Least Squares F-statistic: 0.9198
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.488
Time: 00:07:05 Log-Likelihood: 4.7242
No. Observations: 59 AIC: 2.552
Df Residuals: 53 BIC: 15.02
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY 0.9200 2.259 0.407 0.685 -3.610 5.450
GOLD 1.8658 1.148 1.626 0.110 -0.436 4.167
OIL 0.3128 0.273 1.146 0.257 -0.234 0.860
US_10Y 0.2886 0.442 0.654 0.516 -0.597 1.174
US_2Y 0.0162 0.368 0.044 0.965 -0.722 0.755
US_3M -0.0930 0.177 -0.526 0.601 -0.447 0.261
==============================================================================
Omnibus: 0.604 Durbin-Watson: 1.531
Prob(Omnibus): 0.739 Jarque-Bera (JB): 0.494
Skew: 0.218 Prob(JB): 0.781
Kurtosis: 2.899 Cond. No. 31.0
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#4 with other cryto
ols_1 = OLS(model_df_lag['BTC'] , model_df_lag[[
'BTC_1','BTC_2',
'ETH','ETH_1','ETH_2',
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: BTC R-squared (uncentered): 0.417
Model: OLS Adj. R-squared (uncentered): 0.363
Method: Least Squares F-statistic: 7.718
Date: Sat, 24 Apr 2021 Prob (F-statistic): 1.56e-05
Time: 00:06:37 Log-Likelihood: 17.708
No. Observations: 59 AIC: -25.42
Df Residuals: 54 BIC: -15.03
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
BTC_1 0.1112 0.138 0.803 0.426 -0.166 0.389
BTC_2 0.1202 0.139 0.868 0.389 -0.158 0.398
ETH 0.3835 0.071 5.422 0.000 0.242 0.525
ETH_1 -0.0848 0.087 -0.975 0.334 -0.259 0.090
ETH_2 0.0315 0.086 0.367 0.715 -0.141 0.204
==============================================================================
Omnibus: 11.392 Durbin-Watson: 1.963
Prob(Omnibus): 0.003 Jarque-Bera (JB): 11.824
Skew: -0.894 Prob(JB): 0.00271
Kurtosis: 4.270 Cond. No. 3.11
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#ETH
# no lag
ols_1 = OLS(model_df['ETH'] , model_df[['GOLD','Dow','DXY']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.090
Model: OLS Adj. R-squared (uncentered): 0.043
Method: Least Squares F-statistic: 1.906
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.139
Time: 00:07:22 Log-Likelihood: -22.315
No. Observations: 61 AIC: 50.63
Df Residuals: 58 BIC: 56.96
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.2170 1.501 0.145 0.886 -2.788 3.222
Dow 1.3451 1.034 1.301 0.198 -0.724 3.414
DXY -4.4661 3.322 -1.344 0.184 -11.117 2.184
==============================================================================
Omnibus: 4.790 Durbin-Watson: 1.850
Prob(Omnibus): 0.091 Jarque-Bera (JB): 3.818
Skew: 0.511 Prob(JB): 0.148
Kurtosis: 3.676 Cond. No. 3.45
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
data_for_analysis_ret.dropna()
# lag1 2 3 4 5
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['GOLD', 'GOLD_1','GOLD_2' ,
# 'SP500','SP500_1','SP500_2',
'Dow', 'Dow_1' , 'Dow_2' ,
'DXY', 'DXY_1' , 'DXY_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.207
Model: OLS Adj. R-squared (uncentered): 0.064
Method: Least Squares F-statistic: 1.447
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.194
Time: 00:07:40 Log-Likelihood: -17.509
No. Observations: 59 AIC: 53.02
Df Residuals: 50 BIC: 71.72
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 0.4282 1.526 0.281 0.780 -2.637 3.493
GOLD_1 1.1564 1.508 0.767 0.447 -1.872 4.185
GOLD_2 -0.7969 1.519 -0.525 0.602 -3.847 2.253
Dow 0.9693 1.058 0.916 0.364 -1.156 3.095
Dow_1 1.0408 1.028 1.012 0.316 -1.024 3.106
Dow_2 0.1736 1.081 0.161 0.873 -1.997 2.344
DXY -5.4044 3.418 -1.581 0.120 -12.269 1.460
DXY_1 -3.2034 3.354 -0.955 0.344 -9.941 3.534
DXY_2 1.1473 3.487 0.329 0.743 -5.856 8.151
==============================================================================
Omnibus: 7.204 Durbin-Watson: 1.714
Prob(Omnibus): 0.027 Jarque-Bera (JB): 6.626
Skew: 0.624 Prob(JB): 0.0364
Kurtosis: 4.066 Cond. No. 4.04
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 ETH w stock
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['Dow','Dow_1', 'Dow_2',
'SP500','SP500_1', 'SP500_2',
'FTSE100','FTSE100_1' ,'FTSE100_2',
'SPASX200','SPASX200_1', 'SPASX200_2',
'SPTSX','SPTSX_1','SPTSX_2',
'CAC40','CAC40_1', 'CAC40_2',
'DAX','DAX_1', 'DAX_2',
'HS','HS_1', 'HS_2',
'KOSPI','KOSPI_1', 'KOSPI_2',
'SMI','SMI_1', 'SMI_2',
'SHAI' , 'SHAI_1','SHAI_2',
'NIKKEI','NIKKEI_1','NIKKEI_2',
'NFTY_1','NFTY_1', 'NFTY_2',
'SET','SET_1', 'SET_2']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.694
Model: OLS Adj. R-squared (uncentered): -0.002
Method: Least Squares F-statistic: 0.9971
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.524
Time: 00:07:41 Log-Likelihood: 10.623
No. Observations: 59 AIC: 60.75
Df Residuals: 18 BIC: 145.9
Df Model: 41
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow 0.1994 7.663 0.026 0.980 -15.899 16.298
Dow_1 3.7415 9.064 0.413 0.685 -15.302 22.785
Dow_2 -5.4353 8.289 -0.656 0.520 -22.849 11.979
SP500 3.3487 9.099 0.368 0.717 -15.768 22.466
SP500_1 -5.3693 10.898 -0.493 0.628 -28.266 17.527
SP500_2 8.5455 9.931 0.860 0.401 -12.319 29.410
FTSE100 -0.0107 3.040 -0.004 0.997 -6.398 6.377
FTSE100_1 -3.0015 3.460 -0.867 0.397 -10.271 4.268
FTSE100_2 -2.6835 3.241 -0.828 0.419 -9.492 4.125
SPASX200 7.4615 4.853 1.538 0.142 -2.734 17.657
SPASX200_1 3.8590 4.540 0.850 0.407 -5.680 13.398
SPASX200_2 3.6061 4.100 0.880 0.391 -5.007 12.220
SPTSX -10.4558 6.590 -1.587 0.130 -24.300 3.389
SPTSX_1 2.4972 6.608 0.378 0.710 -11.385 16.380
SPTSX_2 -6.7739 5.869 -1.154 0.264 -19.105 5.557
CAC40 -0.5537 4.870 -0.114 0.911 -10.785 9.677
CAC40_1 -2.6068 5.201 -0.501 0.622 -13.533 8.320
CAC40_2 6.2013 5.815 1.066 0.300 -6.016 18.419
DAX 4.6674 5.055 0.923 0.368 -5.952 15.287
DAX_1 4.4833 5.009 0.895 0.383 -6.041 15.007
DAX_2 -5.7557 5.435 -1.059 0.304 -17.174 5.662
HS 1.5613 3.518 0.444 0.662 -5.829 8.952
HS_1 5.9832 3.045 1.965 0.065 -0.415 12.381
HS_2 0.0673 2.689 0.025 0.980 -5.582 5.717
KOSPI 1.9285 3.008 0.641 0.530 -4.392 8.249
KOSPI_1 1.4845 3.561 0.417 0.682 -5.998 8.967
KOSPI_2 2.7584 3.131 0.881 0.390 -3.820 9.337
SMI -3.0803 4.256 -0.724 0.479 -12.022 5.862
SMI_1 2.0787 4.383 0.474 0.641 -7.129 11.287
SMI_2 2.9618 4.634 0.639 0.531 -6.775 12.698
SHAI -1.8247 2.639 -0.692 0.498 -7.368 3.719
SHAI_1 -1.1152 2.842 -0.392 0.699 -7.087 4.856
SHAI_2 -0.0117 2.738 -0.004 0.997 -5.765 5.741
NIKKEI -3.0811 2.735 -1.127 0.275 -8.827 2.665
NIKKEI_1 -0.0108 2.815 -0.004 0.997 -5.926 5.904
NIKKEI_2 -1.9142 2.733 -0.700 0.493 -7.655 3.827
NFTY_1 -0.7203 1.211 -0.595 0.560 -3.265 1.825
NFTY_1 -0.7203 1.211 -0.595 0.560 -3.265 1.825
NFTY_2 -2.1800 2.316 -0.941 0.359 -7.046 2.686
SET 2.4624 3.247 0.758 0.458 -4.359 9.284
SET_1 -8.3703 3.965 -2.111 0.049 -16.700 -0.040
SET_2 2.2622 3.792 0.597 0.558 -5.704 10.228
==============================================================================
Omnibus: 1.699 Durbin-Watson: 1.626
Prob(Omnibus): 0.428 Jarque-Bera (JB): 0.959
Skew: 0.233 Prob(JB): 0.619
Kurtosis: 3.416 Cond. No. 1.24e+16
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The smallest eigenvalue is 8.99e-33. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
#2 ETH w stock
ols_1 = OLS(data_for_analysis_ret.dropna()['ETH'] , data_for_analysis_ret.dropna()[['Dow',
'SP500',
'FTSE100',
'SPASX200',
'SPTSX',
'CAC40',
'DAX',
'HS',
'KOSPI',
'SMI',
'SHAI' ,
'NIKKEI',
'NFTY',
'SET']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.339
Model: OLS Adj. R-squared (uncentered): 0.142
Method: Least Squares F-statistic: 1.720
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.0834
Time: 00:07:42 Log-Likelihood: -12.567
No. Observations: 61 AIC: 53.13
Df Residuals: 47 BIC: 82.69
Df Model: 14
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Dow -3.4141 4.761 -0.717 0.477 -12.991 6.163
SP500 2.7147 5.009 0.542 0.590 -7.363 12.792
FTSE100 0.2637 2.102 0.125 0.901 -3.966 4.493
SPASX200 5.7094 2.215 2.578 0.013 1.254 10.165
SPTSX -8.3158 3.181 -2.614 0.012 -14.716 -1.916
CAC40 -1.8236 2.855 -0.639 0.526 -7.567 3.920
DAX 4.4464 2.939 1.513 0.137 -1.467 10.360
HS 0.7848 1.701 0.461 0.647 -2.638 4.208
KOSPI 1.9225 1.920 1.002 0.322 -1.939 5.784
SMI 0.8501 2.688 0.316 0.753 -4.558 6.258
SHAI 0.9573 1.548 0.619 0.539 -2.156 4.071
NIKKEI -2.3143 1.776 -1.303 0.199 -5.887 1.259
NFTY 1.2871 1.321 0.975 0.335 -1.369 3.944
SET 0.1555 1.731 0.090 0.929 -3.326 3.637
==============================================================================
Omnibus: 2.542 Durbin-Watson: 1.589
Prob(Omnibus): 0.281 Jarque-Bera (JB): 2.054
Skew: 0.449 Prob(JB): 0.358
Kurtosis: 3.054 Cond. No. 22.3
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 btc w stock
ols_1 = OLS(model_df_lag.dropna()['ETH'] , x_new )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.139
Model: OLS Adj. R-squared (uncentered): 0.092
Method: Least Squares F-statistic: 3.001
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.0380
Time: 00:08:10 Log-Likelihood: -19.940
No. Observations: 59 AIC: 45.88
Df Residuals: 56 BIC: 52.11
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
x1 -0.0331 0.015 -2.276 0.027 -0.062 -0.004
x2 -0.0304 0.042 -0.721 0.474 -0.115 0.054
x3 -0.0952 0.052 -1.817 0.075 -0.200 0.010
==============================================================================
Omnibus: 5.120 Durbin-Watson: 1.536
Prob(Omnibus): 0.077 Jarque-Bera (JB): 4.302
Skew: 0.479 Prob(JB): 0.116
Kurtosis: 3.913 Cond. No. 3.61
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#2 ETH wi dxy
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['DXY' ,'DXY_1' ,'DXY_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.161
Model: OLS Adj. R-squared (uncentered): 0.116
Method: Least Squares F-statistic: 3.593
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.0190
Time: 00:08:14 Log-Likelihood: -19.145
No. Observations: 59 AIC: 44.29
Df Residuals: 56 BIC: 50.52
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -6.6780 2.758 -2.421 0.019 -12.204 -1.152
DXY_1 -5.3516 2.750 -1.946 0.057 -10.860 0.157
DXY_2 2.1588 2.776 0.778 0.440 -3.403 7.721
==============================================================================
Omnibus: 8.348 Durbin-Watson: 1.687
Prob(Omnibus): 0.015 Jarque-Bera (JB): 9.256
Skew: 0.584 Prob(JB): 0.00977
Kurtosis: 4.550 Cond. No. 1.13
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[[
# 'USDT','USDT_1', 'USDT_2', 'USDT_3', 'USDT_2', 'USDT_5',
'GOLD','GOLD_1', 'GOLD_2',
'OIL','OIL_1', 'OIL_2',
'US_10Y','US_10Y_1', 'US_10Y_2',
'US_2Y','US_2Y_1', 'US_2Y_2',
'US_3M', 'US_3M_1', 'US_3M_2' ]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.176
Model: OLS Adj. R-squared (uncentered): -0.105
Method: Least Squares F-statistic: 0.6276
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.836
Time: 00:08:14 Log-Likelihood: -18.619
No. Observations: 59 AIC: 67.24
Df Residuals: 44 BIC: 98.40
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 1.5751 1.902 0.828 0.412 -2.258 5.409
GOLD_1 2.4570 1.911 1.285 0.205 -1.395 6.309
GOLD_2 -1.4997 1.804 -0.831 0.410 -5.135 2.136
OIL 0.2183 0.567 0.385 0.702 -0.924 1.361
OIL_1 0.2108 0.490 0.430 0.669 -0.778 1.199
OIL_2 -0.0435 0.475 -0.092 0.927 -1.000 0.913
US_10Y 0.5264 0.854 0.616 0.541 -1.195 2.248
US_10Y_1 0.1926 0.880 0.219 0.828 -1.580 1.965
US_10Y_2 0.5217 0.845 0.618 0.540 -1.181 2.224
US_2Y -0.6763 0.753 -0.898 0.374 -2.194 0.841
US_2Y_1 0.3439 0.708 0.486 0.630 -1.083 1.771
US_2Y_2 -0.8256 0.771 -1.071 0.290 -2.380 0.729
US_3M 0.2830 0.320 0.883 0.382 -0.363 0.929
US_3M_1 -0.3190 0.327 -0.976 0.334 -0.978 0.340
US_3M_2 0.5495 0.397 1.384 0.173 -0.251 1.350
==============================================================================
Omnibus: 2.993 Durbin-Watson: 1.480
Prob(Omnibus): 0.224 Jarque-Bera (JB): 2.153
Skew: 0.434 Prob(JB): 0.341
Kurtosis: 3.347 Cond. No. 19.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(pca.explained_variance_ratio_)
[0.69520031 0.08261089 0.05345499]
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[[
'GOLD', 'GOLD_1','GOLD_2',
'OIL', 'OIL_1','OIL_2',
'US_10Y','US_10Y_1','US_10Y_2',
'US_2Y','US_2Y_1','US_2Y_2',
'US_3M', 'US_3M_1' ,'US_3M_2'
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.176
Model: OLS Adj. R-squared (uncentered): -0.105
Method: Least Squares F-statistic: 0.6276
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.836
Time: 00:08:34 Log-Likelihood: -18.619
No. Observations: 59 AIC: 67.24
Df Residuals: 44 BIC: 98.40
Df Model: 15
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
GOLD 1.5751 1.902 0.828 0.412 -2.258 5.409
GOLD_1 2.4570 1.911 1.285 0.205 -1.395 6.309
GOLD_2 -1.4997 1.804 -0.831 0.410 -5.135 2.136
OIL 0.2183 0.567 0.385 0.702 -0.924 1.361
OIL_1 0.2108 0.490 0.430 0.669 -0.778 1.199
OIL_2 -0.0435 0.475 -0.092 0.927 -1.000 0.913
US_10Y 0.5264 0.854 0.616 0.541 -1.195 2.248
US_10Y_1 0.1926 0.880 0.219 0.828 -1.580 1.965
US_10Y_2 0.5217 0.845 0.618 0.540 -1.181 2.224
US_2Y -0.6763 0.753 -0.898 0.374 -2.194 0.841
US_2Y_1 0.3439 0.708 0.486 0.630 -1.083 1.771
US_2Y_2 -0.8256 0.771 -1.071 0.290 -2.380 0.729
US_3M 0.2830 0.320 0.883 0.382 -0.363 0.929
US_3M_1 -0.3190 0.327 -0.976 0.334 -0.978 0.340
US_3M_2 0.5495 0.397 1.384 0.173 -0.251 1.350
==============================================================================
Omnibus: 2.993 Durbin-Watson: 1.480
Prob(Omnibus): 0.224 Jarque-Bera (JB): 2.153
Skew: 0.434 Prob(JB): 0.341
Kurtosis: 3.347 Cond. No. 19.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#3 Cryto with alternative
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[['DXY',
'GOLD',
'OIL',
'US_10Y',
'US_2Y',
'US_3M',
]] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.125
Model: OLS Adj. R-squared (uncentered): 0.026
Method: Least Squares F-statistic: 1.263
Date: Sat, 24 Apr 2021 Prob (F-statistic): 0.290
Time: 00:16:06 Log-Likelihood: -20.397
No. Observations: 59 AIC: 52.79
Df Residuals: 53 BIC: 65.26
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
DXY -6.2761 3.457 -1.815 0.075 -13.211 0.658
GOLD 1.1892 1.757 0.677 0.501 -2.334 4.713
OIL -0.1318 0.418 -0.315 0.754 -0.969 0.706
US_10Y 0.4109 0.676 0.608 0.546 -0.945 1.767
US_2Y -0.1106 0.564 -0.196 0.845 -1.241 1.020
US_3M 0.1475 0.271 0.545 0.588 -0.395 0.690
==============================================================================
Omnibus: 2.895 Durbin-Watson: 1.720
Prob(Omnibus): 0.235 Jarque-Bera (JB): 2.007
Skew: 0.324 Prob(JB): 0.367
Kurtosis: 3.630 Cond. No. 31.0
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
#4 with other cryto
ols_1 = OLS(model_df_lag['ETH'] , model_df_lag[[
'ETH_1','ETH_2',
'BTC','BTC_1','BTC_2']] )
results_1 = ols_1.fit()
print(results_1.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: ETH R-squared (uncentered): 0.424
Model: OLS Adj. R-squared (uncentered): 0.370
Method: Least Squares F-statistic: 7.936
Date: Sat, 24 Apr 2021 Prob (F-statistic): 1.16e-05
Time: 00:08:15 Log-Likelihood: -8.0875
No. Observations: 59 AIC: 26.18
Df Residuals: 54 BIC: 36.56
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
ETH_1 0.1126 0.135 0.834 0.408 -0.158 0.383
ETH_2 0.0921 0.133 0.695 0.490 -0.174 0.358
BTC 0.9193 0.170 5.422 0.000 0.579 1.259
BTC_1 0.1528 0.215 0.712 0.480 -0.278 0.583
BTC_2 -0.1951 0.214 -0.910 0.367 -0.625 0.235
==============================================================================
Omnibus: 21.241 Durbin-Watson: 1.867
Prob(Omnibus): 0.000 Jarque-Bera (JB): 36.451
Skew: 1.191 Prob(JB): 1.22e-08
Kurtosis: 6.026 Cond. No. 2.97
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
col_all = ['GOLD', 'OIL', 'US_10Y', 'US_2Y', 'US_3M', 'DXY', 'BTC',
'ETH', 'SHAI', 'Dow', 'SP500', 'FTSE100', 'SPASX200', 'SPTSX', 'CAC40',
'DAX', 'HS', 'KOSPI', 'SMI', 'NIKKEI', 'NFTY', 'SET']
col_1 = ['BTC','ETH']
col_2 = ['BTC','GOLD','US_3M','Dow','DXY','ETH']
col_3 = ['ETH','GOLD', 'US_3M','Dow','DXY','ETH']
# PCA w/o outliner
dff = data_for_analysis_ret.dropna()
dff2 = data_for_analysis_ret.dropna()
for i in ['BTC','GOLD','US_3M','Dow','DXY','ETH']:
dff = dff[ (abs(dff[i]) < abs(np.percentile(dff[i], 99)) ) ]
dff2 = dff2[ (abs(dff2[i]) < abs(np.percentile(dff2[i], 80)) ) ]
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_1]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-97-3365679d35fb> in <module> 8 from sklearn.decomposition import PCA 9 pca = PCA(n_components=3) ---> 10 x_new = pca.fit_transform(x) 11 12 ~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in fit_transform(self, X, y) 374 C-ordered array, use 'np.ascontiguousarray'. 375 """ --> 376 U, S, V = self._fit(X) 377 U = U[:, :self.n_components_] 378 ~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in _fit(self, X) 421 # Call different fits for either full or truncated SVD 422 if self._fit_svd_solver == 'full': --> 423 return self._fit_full(X, n_components) 424 elif self._fit_svd_solver in ['arpack', 'randomized']: 425 return self._fit_truncated(X, n_components, self._fit_svd_solver) ~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in _fit_full(self, X, n_components) 437 "if n_samples >= n_features") 438 elif not 0 <= n_components <= min(n_samples, n_features): --> 439 raise ValueError("n_components=%r must be between 0 and " 440 "min(n_samples, n_features)=%r with " 441 "svd_solver='full'" ValueError: n_components=3 must be between 0 and min(n_samples, n_features)=2 with svd_solver='full'
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'BLACK', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-99-85719136021c> in <module> 2 mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style 3 # Call the biplot function for only the first 2 PCs ----> 4 biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None") 5 plt.show() AttributeError: 'PCA' object has no attribute 'components_'
#2 PCA BTC on ASSET type
import sklearn
from sklearn.preprocessing import StandardScaler
x = dff.dropna().iloc[:,1:][col_2]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.3386318 0.24764781 0.18424244]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(2), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,1:3], np.transpose(pca.components_[1:3, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 3, coeff[i,1] * 3, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,[0,2]], np.transpose(pca.components_[[0,2], :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, str(col_1[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-109-85719136021c> in <module> 2 mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style 3 # Call the biplot function for only the first 2 PCs ----> 4 biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None") 5 plt.show() AttributeError: 'PCA' object has no attribute 'components_'
#2 PCA on asset type
# PCA w outliner
import sklearn
from sklearn.preprocessing import StandardScaler
x = data_for_analysis_ret.dropna().iloc[:,1:][col_2]
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
x_new = pca.fit_transform(x)
print(pca.explained_variance_ratio_)
[0.33523871 0.24399451 0.17990774]
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(2), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,0:2], np.transpose(pca.components_[0:2, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(2), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,1:3], np.transpose(pca.components_[1:3, :]), y = "None")
plt.show()
def biplot(score, coeff , y):
'''
Author: Serafeim Loukas, serafeim.loukas@epfl.ch
Inputs:
score: the projected data
coeff: the eigenvectors (PCs)
y: the class labels
'''
xs = score[:,0] # projection on PC1
ys = score[:,1] # projection on PC2
n = coeff.shape[0] # number of variables
plt.figure(figsize=(10,8), dpi=100)
classes = np.unique(y)
colors = ['g','r','y']
markers=['o','^','x']
for s,l in enumerate(classes):
plt.scatter(xs[y==l],ys[y==l], c = colors[s], marker=markers[s]) # color based on group
for i in range(n):
#plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
plt.arrow(0, 0, coeff[i,0]*3, coeff[i,1]*3, color = 'RED', alpha = 0.9,linestyle = '-',linewidth = 1.5, overhang=0.2)
plt.text(coeff[i,0]* 5, coeff[i,1] * 5, str(col_2[i]), color = 'k', ha = 'center', va = 'center',fontsize=15)
plt.xlabel("PC{}".format(1), size=14)
plt.ylabel("PC{}".format(3), size=14)
limx= int(xs.max()) + 1
limy= int(ys.max()) + 1
plt.xlim([-limx,limx])
plt.ylim([-limy,limy])
plt.grid()
plt.tick_params(axis='both', which='both', labelsize=14)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) # reset ggplot style
# Call the biplot function for only the first 2 PCs
biplot(x_new[:,[0,2]], np.transpose(pca.components_[[0,2], :]), y = "None")
plt.show()